Skip to content

Commit

Permalink
init
Browse files Browse the repository at this point in the history
  • Loading branch information
Spico197 committed Jul 24, 2023
1 parent 9189aae commit b38db8c
Show file tree
Hide file tree
Showing 27 changed files with 1,171 additions and 1 deletion.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -158,3 +158,5 @@ cython_debug/
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/

debug.py
18 changes: 18 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
repos:
- repo: https://github.com/pycqa/isort
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
args: ["--profile", "black", "--filter-files"]
- repo: https://github.com/psf/black
rev: 22.12.0
hooks:
- id: black
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.4.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
- id: check-yaml
- id: check-added-large-files
30 changes: 30 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
all: format clean pre test
echo 'finished'

.PHONY: format
format:
isort --profile black --filter-files .
black .

.PHONY: test
test:
coverage run --source rex -m pytest -vv .
coverage report -m
flake8

.PHONY: pre
pre:
pre-commit run --all-files

.PHONY: debug
debug:
pytest -vv tests/tasks/test_re.py

.PHONY: clean
clean:
rm -rf build/
rm -rf dist/
rm -rf *.egg-info/
rm -f .coverage
rm -f coverage.xml
find . | grep -E '(__pycache__|\.pyc|\.pyo$$)' | xargs rm -rf
12 changes: 11 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1 +1,11 @@
# smoe
# smoe

## For developers

- Make sure the Python version `>=3.10` (a strict version contraint for better type hinting)

```bash
$ pip install -e .[dev]
```


1 change: 1 addition & 0 deletions VERSION
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
0.0.0
20 changes: 20 additions & 0 deletions conf/deepspeed/bf16.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"bf16": {
"enabled": true
},
"zero_optimization": {
"stage": 2,
"allgather_partitions": true,
"allgather_bucket_size": 1e8,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 1e8,
"contiguous_gradients": true
},
"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 2000,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
26 changes: 26 additions & 0 deletions conf/deepspeed/fp16.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
{
"fp16": {
"enabled": "auto",
"loss_scale": 0,
"loss_scale_window": 100,
"initial_scale_power": 16,
"hysteresis": 2,
"min_loss_scale": 1e-10
},
"zero_optimization": {
"stage": 2,
"allgather_partitions": true,
"allgather_bucket_size": 1e8,
"overlap_comm": true,
"reduce_scatter": true,
"reduce_bucket_size": 1e8,
"contiguous_gradients": true
},

"gradient_accumulation_steps": "auto",
"gradient_clipping": "auto",
"steps_per_print": 2000,
"train_batch_size": "auto",
"train_micro_batch_size_per_gpu": "auto",
"wall_clock_breakdown": false
}
6 changes: 6 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
scikit-learn>=1.3.0
omegaconf>=2.0.6
tqdm>=4.65.0
datasets>=2.13.1
transformers>=4.30.2
peft>=0.4.0
4 changes: 4 additions & 0 deletions scripts/cpt/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
# Scripts for Continual Pre-training

- `lora.sh`: Parameter-efficient tuning
- `fpt.sh`: Full-parameter pretraining
74 changes: 74 additions & 0 deletions scripts/cpt/fpt.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/bash

#SBATCH --job-name=cpt-bf16-2nodes-woLora
#SBATCH --partition=MoE
#SBATCH --output=logs/%x.log
#SBATCH --error=logs/%x.log

#SBATCH --nodes=2
#SBATCH --ntasks-per-node=1
#SBATCH --gres=gpu:8
#SBATCH --cpus-per-task=8

source ~/anaconda3/bin/activate torch

lr=2e-4

pretrained_model=/mnt/petrelfs/share_data/quxiaoye/models/llama_7B/
tokenizer_path=/mnt/petrelfs/share_data/quxiaoye/models/llama_7B/
dataset_dir=resources
data_cache=temp_data_cache_dir
per_device_train_batch_size=1
per_device_eval_batch_size=1
gradient_accumulation_steps=8
output_dir=output_dir_cpt_ymcui

deepspeed_config_file=conf/ds_bf16.json

nodes=( $( scontrol show hostnames $SLURM_JOB_NODELIS ) )
nodes_array=($nodes)
head_node=${nodes_array[0]}
head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)
echo "Node: $head_node"
echo "Node IP: $head_node_ip"
export LOGLEVEL=INFO

srun torchrun \
--nnodes 2 \
--nproc_per_node 8 \
--node_rank $SLURM_NODEID \
--rdzv_id $RANDOM \
--rdzv_backend c10d \
--rdzv_endpoint $head_node:29518 \
src/entrypoint/run_clm_pt_wo_peft.py \
--deepspeed ${deepspeed_config_file} \
--model_name_or_path ${pretrained_model} \
--tokenizer_name_or_path ${tokenizer_path} \
--dataset_dir ${dataset_dir} \
--data_cache_dir ${data_cache} \
--validation_split_percentage 0.001 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--do_train \
--seed $RANDOM \
--bf16 \
--num_train_epochs 1 \
--lr_scheduler_type cosine \
--learning_rate ${lr} \
--warmup_ratio 0.05 \
--weight_decay 0.01 \
--logging_strategy steps \
--logging_steps 10 \
--save_strategy steps \
--save_total_limit 3 \
--save_steps 200 \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--preprocessing_num_workers 8 \
--block_size 512 \
--output_dir ${output_dir} \
--overwrite_output_dir \
--ddp_timeout 30000 \
--logging_first_step True \
--torch_dtype bfloat16 \
--gradient_checkpointing \
--ddp_find_unused_parameters False
84 changes: 84 additions & 0 deletions scripts/cpt/lora.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
#!/usr/bin/bash

#SBATCH --job-name=cpt-lora-bf16-2nodes
#SBATCH --partition=MoE
#SBATCH --output=logs/%x.log
#SBATCH --error=logs/%x.log

#SBATCH --nodes=2
#SBATCH --ntasks-per-node=1
#SBATCH --gres=gpu:8
#SBATCH --cpus-per-task=8

source ~/anaconda3/bin/activate torch

lr=2e-4
lora_rank=8
lora_alpha=32
lora_trainable="q_proj,v_proj,k_proj,o_proj,gate_proj,down_proj,up_proj"
modules_to_save="embed_tokens,lm_head"
lora_dropout=0.05

pretrained_model=/mnt/petrelfs/share_data/quxiaoye/models/llama_7B/
tokenizer_path=/mnt/petrelfs/share_data/quxiaoye/models/llama_7B/
dataset_dir=resources
data_cache=temp_data_cache_dir
per_device_train_batch_size=1
per_device_eval_batch_size=1
gradient_accumulation_steps=8
output_dir=output_dir

deepspeed_config_file=conf/ds_bf16.json

nodes=( $( scontrol show hostnames $SLURM_JOB_NODELIS ) )
nodes_array=($nodes)
head_node=${nodes_array[0]}
head_node_ip=$(srun --nodes=1 --ntasks=1 -w "$head_node" hostname --ip-address)
echo "Node: $head_node"
echo "Node IP: $head_node_ip"
export LOGLEVEL=INFO

srun torchrun \
--nnodes 2 \
--nproc_per_node 8 \
--node_rank $SLURM_NODEID \
--rdzv_id $RANDOM \
--rdzv_backend c10d \
--rdzv_endpoint $head_node:29518 \
src/entrypoint/run_clm_pt_with_peft.py \
--deepspeed ${deepspeed_config_file} \
--model_name_or_path ${pretrained_model} \
--tokenizer_name_or_path ${tokenizer_path} \
--dataset_dir ${dataset_dir} \
--data_cache_dir ${data_cache} \
--validation_split_percentage 0.001 \
--per_device_train_batch_size ${per_device_train_batch_size} \
--per_device_eval_batch_size ${per_device_eval_batch_size} \
--do_train \
--seed $RANDOM \
--bf16 \
--num_train_epochs 1 \
--lr_scheduler_type cosine \
--learning_rate ${lr} \
--warmup_ratio 0.05 \
--weight_decay 0.01 \
--logging_strategy steps \
--logging_steps 10 \
--save_strategy steps \
--save_total_limit 3 \
--save_steps 200 \
--gradient_accumulation_steps ${gradient_accumulation_steps} \
--preprocessing_num_workers 8 \
--block_size 512 \
--output_dir ${output_dir} \
--overwrite_output_dir \
--ddp_timeout 30000 \
--logging_first_step True \
--lora_rank ${lora_rank} \
--lora_alpha ${lora_alpha} \
--trainable ${lora_trainable} \
--modules_to_save ${modules_to_save} \
--lora_dropout ${lora_dropout} \
--torch_dtype float16 \
--gradient_checkpointing \
--ddp_find_unused_parameters False
49 changes: 49 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import os

import setuptools

readme_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "README.md")
with open(readme_filepath, "r") as fh:
long_description = fh.read()

version_filepath = os.path.join(os.path.dirname(os.path.abspath(__file__)), "VERSION")
with open(version_filepath, "r") as fh:
version = fh.read().strip()

setuptools.setup(
name="smoe",
version=version,
author="MoE Group",
author_email="[email protected]",
description="A toolkit for LLM MoE and continual pretraining.",
long_description_content_type="text/markdown",
long_description=long_description,
url="https://github.com/Spico197/smoe",
packages=setuptools.find_packages(exclude=["tests", "tests.*", "docs", "docs.*"]),
classifiers=[
"Programming Language :: Python :: 3",
"License :: OSI Approved :: Apache Software License",
"Operating System :: OS Independent",
],
python_requires=">=3.10",
install_requires=[
"scikit-learn>=1.3.0",
"omegaconf>=2.0.6",
"tqdm>=4.65.0",
"datasets>=2.13.1",
"transformers>=4.30.2",
"peft>=0.4.0",
],
extras_require={
"dev": [
"pytest",
"coverage",
"black",
"isort",
"flake8",
"pre-commit",
]
},
include_package_data=True,
entry_points={},
)
Empty file added smoe/__init__.py
Empty file.
Empty file added smoe/callbacks/__init__.py
Empty file.
32 changes: 32 additions & 0 deletions smoe/callbacks/save_peft_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import os

from transformers import TrainerCallback
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR


class SavePeftModelCallback(TrainerCallback):
def __init__(self, peft_model_subdir: str = "peft_model"):
self.peft_model_subdir = peft_model_subdir

def save_model(self, args, state, **kwargs):
if state.best_model_checkpoint is not None:
checkpoint_folder = os.path.join(
state.best_model_checkpoint, self.peft_model_subdir
)
else:
checkpoint_folder = os.path.join(
args.output_dir, f"{PREFIX_CHECKPOINT_DIR}-{state.global_step}"
)

peft_model_path = os.path.join(checkpoint_folder, self.peft_model_subdir)
kwargs["model"].save_pretrained(peft_model_path)
kwargs["tokenizer"].save_pretrained(peft_model_path)

def on_save(self, args, state, control, **kwargs):
self.save_model(args, state, **kwargs)
return control

def on_train_end(self, args, state, control, **kwargs):
peft_model_path = os.path.join(args.output_dir, self.peft_model_subdir)
kwargs["model"].save_pretrained(peft_model_path)
kwargs["tokenizer"].save_pretrained(peft_model_path)
Empty file added smoe/data/__init__.py
Empty file.
Loading

0 comments on commit b38db8c

Please sign in to comment.