Skip to content

Commit 5984454

Browse files
committed
WIP
1 parent 5acde3a commit 5984454

12 files changed

+26
-29
lines changed

recipes/A5000_24GB_x8/i18n-bn-wikipedia.yaml

+3-5
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@ lora_r: 8
1111
lora_alpha: 16
1212
lora_dropout: 0.05
1313
train_claim_gpu_num: 4
14-
train_per_device_train_batch_size: 8
15-
train_gradient_accumulation_steps: 4
16-
train_num_train_epochs: 4
17-
train_max_steps: 2000
18-
train_fp16: True
14+
train_per_device_train_batch_size: 4
15+
train_gradient_accumulation_steps: 64
16+
train_num_train_epochs: 2
1917
inference_max_new_tokens: 32
2018
evaluations:
2119
-

recipes/A5000_24GB_x8/i18n-es-wikipedia.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ model_name: tinyllama-es-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.es
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8

recipes/A5000_24GB_x8/i18n-fr-wikipedia.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ model_name: tinyllama-fr-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.fr
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8

recipes/A5000_24GB_x8/i18n-it-wikipedia.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ model_name: tinyllama-it-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.it
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8

recipes/A5000_24GB_x8/i18n-ja-wikipedia.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,15 @@ model_name: tinyllama-ja-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.ja
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8
1211
lora_alpha: 16
1312
lora_dropout: 0.05
1413
train_claim_gpu_num: 4
1514
train_per_device_train_batch_size: 4
16-
train_gradient_accumulation_steps: 64
15+
train_gradient_accumulation_steps: 16
1716
train_num_train_epochs: 2
1817
inference_max_new_tokens: 32
1918
evaluations:

recipes/A5000_24GB_x8/i18n-jv-wikipedia.yaml

+3-5
Original file line numberDiff line numberDiff line change
@@ -11,11 +11,9 @@ lora_r: 8
1111
lora_alpha: 16
1212
lora_dropout: 0.05
1313
train_claim_gpu_num: 4
14-
train_per_device_train_batch_size: 8
15-
train_gradient_accumulation_steps: 4
16-
train_num_train_epochs: 4
17-
train_max_steps: 2000
18-
train_fp16: True
14+
train_per_device_train_batch_size: 4
15+
train_gradient_accumulation_steps: 64
16+
train_num_train_epochs: 2
1917
inference_max_new_tokens: 32
2018
evaluations:
2119
-

recipes/A5000_24GB_x8/i18n-ko-wikipedia.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ model_name: tinyllama-ko-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.ko
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8

recipes/A5000_24GB_x8/i18n-mg-wikipedia.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ model_name: tinyllama-mg-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.mg
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8

recipes/A5000_24GB_x8/i18n-ru-wikipedia.yaml

+1-2
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,7 @@ model_name: tinyllama-ru-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.ru
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8

recipes/A5000_24GB_x8/i18n-vi-wikipedia.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ lora_alpha: 16
1212
lora_dropout: 0.05
1313
train_claim_gpu_num: 4
1414
train_per_device_train_batch_size: 4
15-
train_gradient_accumulation_steps: 64
15+
train_gradient_accumulation_steps: 16
1616
train_num_train_epochs: 2
1717
inference_max_new_tokens: 32
1818
evaluations:

recipes/A5000_24GB_x8/i18n-zh-wikipedia.yaml

+2-3
Original file line numberDiff line numberDiff line change
@@ -4,14 +4,13 @@ model_name: tinyllama-zh-wikipedia-1.5T-v0.1
44
output_base_dir: /data/output
55
dataset_id: wikimedia/wikipedia
66
dataset_load_config: 20231101.zh
7-
dataset_input_field_name: title
8-
dataset_output_field_name: text
7+
dataset_input_field_name: text
98
dataset_train_split_seed: 42
109
dataset_train_split_test_size: 0.2
1110
lora_r: 8
1211
lora_alpha: 16
1312
lora_dropout: 0.05
14-
train_claim_gpu_num: 3
13+
train_claim_gpu_num: 4
1514
train_per_device_train_batch_size: 4
1615
train_gradient_accumulation_steps: 64
1716
train_num_train_epochs: 2

src/train.py

+9
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,13 @@ def prepare_train_data(dataset_id):
113113
if "dataset_load_config" in train_config:
114114
dataset_load_config = train_config["dataset_load_config"]
115115
data = load_dataset(dataset_id, dataset_load_config, split="train", num_proc=32)
116+
if (
117+
dataset_load_config == "20231101.ja"
118+
or dataset_load_config == "20231101.vi"
119+
or dataset_load_config == "20231101.es"
120+
or dataset_load_config == "20231101.de"
121+
):
122+
data = data.filter(lambda item, idx: idx % 3 == 0, with_indices=True)
116123
else:
117124
data = load_dataset(dataset_id, split="train", num_proc=32)
118125

@@ -203,6 +210,8 @@ def load_model_and_tokenizer(model_id):
203210
torch_dtype=torch.float16,
204211
# Trust remote code
205212
trust_remote_code=True,
213+
# Set low cpu mem usage
214+
low_cpu_mem_usage=True,
206215
# Set device map to auto
207216
# device_map="auto",
208217
device_map={"": PartialState().process_index},

0 commit comments

Comments
 (0)