Skip to content

Commit

Permalink
Merge branch 'main' into sql-dataset-support
Browse files Browse the repository at this point in the history
  • Loading branch information
sywangyi authored Nov 23, 2023
2 parents 346fcb6 + 13fa10d commit b4efaa7
Show file tree
Hide file tree
Showing 20 changed files with 249 additions and 61 deletions.
1 change: 0 additions & 1 deletion .github/workflows/build_pr_documentation.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ jobs:
cd doc-builder
git pull origin main
pip install .
pip install black
cd ..
- name: Make documentation
Expand Down
9 changes: 3 additions & 6 deletions .github/workflows/check_code_quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,9 @@ jobs:
run: |
source venv/bin/activate
pip install --upgrade pip
pip install black ruff
- name: Check style with black
run: |
source venv/bin/activate
black --check .
pip install ruff
- name: Check style with ruff
run: |
source venv/bin/activate
ruff .
ruff check . setup.py
ruff format --check . setup.py
4 changes: 2 additions & 2 deletions .github/workflows/slow_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ name: Non-regression tests
on:
workflow_dispatch:
schedule:
- cron: '0 21 * * 0-5' # every Sunday to Friday at 11pm CET
- cron: '0 21 * * 6' # every Saturday at 1am CET
- cron: '0 21 * * 0-5' # every Sunday to Friday at 11pm CET (10pm winter time)
- cron: '0 21 * * 6' # every Saturday at 1am CET (midnight winter time)

concurrency:
group: ${{ github.workflow }}
Expand Down
142 changes: 142 additions & 0 deletions .github/workflows/slow_tests_gaudi2.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
name: (Gaudi2) Non-regression tests

on:
workflow_dispatch:
schedule:
- cron: '0 23 * * 3,6' # every Wednesday and Saturday at 1am CET (midnight winter time)

concurrency:
group: ${{ github.workflow }}

jobs:
stable-diffusion:
name: Test Stable Diffusion
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-e GAUDI2_CI=1 \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
/bin/bash tests/ci/slow_tests_diffusers.sh
deepspeed:
name: Test DeepSpeed models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- stable-diffusion # run the job when the previous test job is done
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-e GAUDI2_CI=1 \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
/bin/bash tests/ci/slow_tests_deepspeed.sh
multi-card:
name: Test multi-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed # run the job when the previous test job is done
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-e GAUDI2_CI=1 \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
/bin/bash tests/ci/slow_tests_8x.sh
single-card:
name: Test single-card models
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed
- multi-card # run the job when the previous test jobs are done
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-e GAUDI2_CI=1 \
-e RUN_ALBERT_XXL_1X=1 \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
/bin/bash tests/ci/slow_tests_1x.sh
text-generation:
name: Test text-generation example
if: ${{ !cancelled() && (success() || failure()) }}
needs:
- deepspeed
- multi-card
- single-card # run the job when the previous test jobs are done
runs-on: [self-hosted, linux, x64, gaudi2]
steps:
- name: Checkout
uses: actions/checkout@v2
- name: Pull image
run: |
docker pull vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest
- name: Run tests
run: |
docker run \
-v $PWD:/root/workspace \
--workdir=/root/workspace \
--runtime=habana \
-e HABANA_VISIBLE_DEVICES=all \
-e OMPI_MCA_btl_vader_single_copy_mechanism=none \
-e GAUDI2_CI=1 \
--cap-add=sys_nice \
--net=host \
--ipc=host \
vault.habana.ai/gaudi-docker/1.12.0/ubuntu20.04/habanalabs/pytorch-installer-2.0.1:latest \
make slow_tests_text_generation_example TOKEN=${{ secrets.TEXT_GENERATION_CI_HUB_TOKEN }}
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -22,12 +22,12 @@ REAL_CLONE_URL = $(if $(CLONE_URL),$(CLONE_URL),$(DEFAULT_CLONE_URL))

# Run code quality checks
style_check: clean
black --check . setup.py
ruff . setup.py
ruff check . setup.py
ruff format --check . setup.py

style: clean
black . setup.py
ruff . setup.py --fix
ruff check . setup.py --fix
ruff format . setup.py

# Run unit and integration tests
fast_tests:
Expand Down
1 change: 1 addition & 0 deletions examples/contrastive-image-text/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,7 @@ python ../gaudi_spawn.py --use_mpi --world_size 8 run_bridgetower.py \
--output_dir /tmp/bridgetower-test \
--model_name_or_path BridgeTower/bridgetower-large-itm-mlm-itc \
--dataset_name jmhessel/newyorker_caption_contest --dataset_config_name matching \
--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6 \
--image_column image --caption_column image_description \
--remove_unused_columns=False \
--do_train --do_eval --do_predict \
Expand Down
5 changes: 5 additions & 0 deletions examples/contrastive-image-text/run_bridgetower.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ class DataTrainingArguments:
dataset_config_name: Optional[str] = field(
default=None, metadata={"help": "The configuration name of the dataset to use (via the datasets library)."}
)
dataset_revision: str = field(
default="main",
metadata={"help": "The specific dataset version to use (can be a branch name, tag name or commit id)."},
)
data_dir: Optional[str] = field(default=None, metadata={"help": "The data directory containing input files."})
image_column: Optional[str] = field(
default="image_path",
Expand Down Expand Up @@ -339,6 +343,7 @@ def main():
keep_in_memory=False,
data_dir=data_args.data_dir,
token=model_args.token,
revision=data_args.dataset_revision,
)
else:
data_files = {}
Expand Down
40 changes: 39 additions & 1 deletion examples/language-modeling/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -321,7 +321,8 @@ python run_clm.py \

## PEFT

To run LoRA finetuning and inference. you could use `run_lora_clm.py` as an example. Multi-card examples can be simply adapted to run LoRA finetuning. Here is the CLM example with Llama1-7B and Falcon-40B:
To run LoRA finetuning, you can use `run_lora_clm.py`.
Here are single-/multi-device command examples for Llama1-7B, Falcon-40B and Llama2-70B:

- Single-card finetuning of Llama1-7B:
```bash
Expand Down Expand Up @@ -484,6 +485,43 @@ python ../gaudi_spawn.py \

```

- Multi-card finetuning of Llama2-70B with DeepSpeed ZeRO-3 optimization and LoRA:

> The following command requires Habana DeepSpeed 1.13.0 or later.
```bash
PT_HPU_MAX_COMPOUND_OP_SIZE=10 DEEPSPEED_HPU_ZERO3_SYNC_MARK_STEP_REQUIRED=1 \
python3 ../gaudi_spawn.py --use_deepspeed --world_size 8 run_lora_clm.py \
--model_name_or_path meta-llama/Llama-2-70b-hf \
--deepspeed llama2_ds_zero3_config.json \
--dataset_name tatsu-lab/alpaca \
--bf16 True \
--output_dir ./lora_out \
--num_train_epochs 2 \
--max_seq_len 2048 \
--per_device_train_batch_size 10 \
--per_device_eval_batch_size 10 \
--gradient_checkpointing \
--evaluation_strategy epoch \
--eval_delay 2 \
--save_strategy no \
--learning_rate 0.0018 \
--warmup_ratio 0.03 \
--lr_scheduler_type "cosine" \
--logging_steps 1 \
--dataset_concatenation \
--attn_softmax_bf16 True \
--do_train \
--do_eval \
--use_habana \
--use_lazy_mode \
--pipelining_fwd_bwd \
--throughput_warmup_steps 3 \
--lora_rank 4 \
--lora_target_modules "q_proj" "v_proj" "k_proj" "o_proj" \
--validation_split_percentage 4
````

## Streaming

To use the streaming dataset mode which can be very useful for large datasets, add `--streaming` with `--max_steps` specified in the command line. This is currently supported by `run_mlm.py` and `run_clm.py`.
Expand Down
15 changes: 15 additions & 0 deletions examples/language-modeling/llama2_ds_zero3_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
{
"steps_per_print": 64,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"gradient_accumulation_steps": "auto",
"bf16": {
"enabled": true
},
"gradient_clipping": 1.0,
"zero_optimization": {
"stage": 3,
"overlap_comm": false,
"contiguous_gradients": false
}
}
1 change: 0 additions & 1 deletion examples/text-generation/run_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,6 @@ def setup_parser(parser):

if not args.use_hpu_graphs:
args.limit_hpu_graphs = False
args.reuse_cache = False

return args

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -417,9 +417,9 @@ def prepare_inputs_for_generation(self, input_ids, past_key_values=None, token_i
# only last token for inputs_ids if past is defined in kwargs
if past_key_values:
if token_idx is not None:
input_ids = torch.index_select(input_ids, 1, token_idx - 1).unsqueeze(-1)
input_ids = torch.index_select(input_ids, 1, token_idx - 1)
if token_type_ids is not None:
token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1).unsqueeze(-1)
token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1)
else:
input_ids = input_ids[:, -1].unsqueeze(-1)
if token_type_ids is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -352,9 +352,9 @@ def prepare_inputs_for_generation(
# only last token for inputs_ids if past is defined in kwargs
if past_key_values:
if token_idx is not None:
input_ids = torch.index_select(input_ids, 1, token_idx - 1).unsqueeze(-1)
input_ids = torch.index_select(input_ids, 1, token_idx - 1)
if token_type_ids is not None:
token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1).unsqueeze(-1)
token_type_ids = torch.index_select(token_type_ids, 1, token_idx - 1)
else:
input_ids = input_ids[:, -1].unsqueeze(-1)
if token_type_ids is not None:
Expand Down
19 changes: 14 additions & 5 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,13 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.

[tool.black]
line-length = 119
target-version = ['py37']

[tool.ruff]
# Never enforce `E501` (line length violations).
ignore = ["C901", "E501", "E741"]
ignore = ["C901", "E501", "E741", "F402", "F823"]
select = ["C", "E", "F", "I", "W"]
line-length = 119
exclude = ["text-generation-inference"]
Expand All @@ -30,3 +26,16 @@ exclude = ["text-generation-inference"]
[tool.ruff.isort]
lines-after-imports = 2
known-first-party = ["optimum.habana"]

[tool.ruff.format]
# Like Black, use double quotes for strings.
quote-style = "double"

# Like Black, indent with spaces, rather than tabs.
indent-style = "space"

# Like Black, respect magic trailing commas.
skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"
1 change: 0 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@
]

QUALITY_REQUIRES = [
"black",
"ruff",
"hf_doc_builder @ git+https://github.com/huggingface/doc-builder.git",
]
Expand Down
1 change: 1 addition & 0 deletions tests/baselines/bridgetower_large_itm_mlm_itc.json
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
"train_samples_per_second": 921.069,
"extra_arguments": [
"--dataset_config_name matching",
"--dataset_revision 3c6c4f6c0ff7e902833d3afa5f8f3875c2b036e6",
"--image_column image",
"--caption_column image_description",
"--remove_unused_columns False",
Expand Down
Loading

0 comments on commit b4efaa7

Please sign in to comment.