Skip to content

Commit a59e427

Browse files
committed
tweak recipe
1 parent a125b92 commit a59e427

4 files changed

+42
-6
lines changed

recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-math-ja-wikipedia.yaml

+3-3
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ dataset_train_split_test_size: 0.2
1010
lora_r: 8
1111
lora_alpha: 16
1212
lora_dropout: 0.05
13-
train_claim_gpu_num: 4
14-
train_per_device_train_batch_size: 8
15-
train_gradient_accumulation_steps: 2
13+
train_claim_gpu_num: 8
14+
train_per_device_train_batch_size: 1
15+
train_gradient_accumulation_steps: 16
1616
train_num_train_epochs: 4

recipes/A5000_24GB_x8/Mistral-7B-v0.1/coder-python-ja-wikipedia-amenokaku.yaml

+5-3
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ base_model_id: yuiseki/Mistral-7B-v0.1-ja-wikipedia-v0.1
33
model_name: Mistral-7B-v0.1-ja-wikipedia-amenokaku-v0.1
44
output_base_dir: /data/output
55
dataset_id: kunishou/amenokaku-code-instruct
6+
dataset_filter_field_name: liscence
7+
dataset_filter_field_value: MIT
68
dataset_input_field_name: instruction
79
dataset_context_field_name: input
810
dataset_output_field_name: output
@@ -11,7 +13,7 @@ dataset_train_split_test_size: 0.2
1113
lora_r: 8
1214
lora_alpha: 16
1315
lora_dropout: 0.05
14-
train_claim_gpu_num: 4
15-
train_per_device_train_batch_size: 8
16-
train_gradient_accumulation_steps: 4
16+
train_claim_gpu_num: 8
17+
train_per_device_train_batch_size: 1
18+
train_gradient_accumulation_steps: 16
1719
train_num_train_epochs: 4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
target_task: tasks/i18n/ja.md
2+
base_model_id: yuiseki/Mistral-7B-v0.1-ja-wikipedia-v0.1
3+
model_name: Mistral-7B-v0.1-ja-wikipedia-databricks-dolly-v0.1
4+
output_base_dir: /data/output
5+
dataset_id: llm-jp/databricks-dolly-15k-ja
6+
dataset_context_field_name: context
7+
dataset_input_field_name: instruction
8+
dataset_output_field_name: response
9+
dataset_filter_field_value: 0
10+
dataset_train_split_seed: 42
11+
dataset_train_split_test_size: 0.2
12+
lora_r: 8
13+
lora_alpha: 16
14+
lora_dropout: 0.05
15+
train_claim_gpu_num: 8
16+
train_per_device_train_batch_size: 1
17+
train_gradient_accumulation_steps: 16
18+
train_num_train_epochs: 4
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
target_task: tasks/i18n/ja.md
2+
base_model_id: yuiseki/Mistral-7B-v0.1-ja-wikipedia-v0.1
3+
model_name: yuiseki/Mistral-7B-v0.1-ja-wikipedia-jimba-v0.1
4+
output_base_dir: /data/output
5+
dataset_id: Kendamarron/jimba-instuction-1k-beta
6+
dataset_input_field_name: instruction
7+
dataset_output_field_name: output
8+
dataset_train_split_seed: 42
9+
dataset_train_split_test_size: 0.2
10+
lora_r: 8
11+
lora_alpha: 16
12+
lora_dropout: 0.05
13+
train_claim_gpu_num: 4
14+
train_per_device_train_batch_size: 8
15+
train_gradient_accumulation_steps: 4
16+
train_num_train_epochs: 4

0 commit comments

Comments
 (0)