File tree 2 files changed +5
-4
lines changed
2 files changed +5
-4
lines changed Original file line number Diff line number Diff line change 1
1
target_task : tasks/i18n/ja.md
2
2
base_model_id : TinyLlama/TinyLlama-1.1B-intermediate-step-715k-1.5T
3
- model_name : TinyLlama-1.5T- ja-wikipedia-step-10k
3
+ model_name : tinyllama- ja-wikipedia-1.5T- step-4k
4
4
output_base_dir : /data/output
5
5
dataset_id : wikimedia/wikipedia
6
6
dataset_load_config : 20231101.ja
@@ -14,7 +14,7 @@ train_claim_gpu_num: 4
14
14
train_per_device_train_batch_size : 8
15
15
train_gradient_accumulation_steps : 4
16
16
train_num_train_epochs : 4
17
- train_max_steps : 10000
17
+ train_max_steps : 4000
18
18
train_fp16 : True
19
19
inference_max_new_tokens : 32
20
20
evaluations :
Original file line number Diff line number Diff line change 3
3
import yaml
4
4
from datasets .load import load_dataset
5
5
6
- load_dataset ("oscar" )
7
- load_dataset ("cc100" )
6
+ # load_dataset("oscar")
7
+ load_dataset ("cc100" , "en" )
8
+ load_dataset ("cc100" , "ja" )
8
9
load_dataset ("cerebras/SlimPajama-627B" )
9
10
load_dataset ("bigcode/starcoderdata" )
10
11
load_dataset ("Open-Orca/OpenOrca" )
You can’t perform that action at this time.
0 commit comments