File tree 1 file changed +3
-6
lines changed
1 file changed +3
-6
lines changed Original file line number Diff line number Diff line change @@ -113,13 +113,10 @@ def prepare_train_data(dataset_id):
113
113
if "dataset_load_config" in train_config :
114
114
dataset_load_config = train_config ["dataset_load_config" ]
115
115
data = load_dataset (dataset_id , dataset_load_config , split = "train" , num_proc = 32 )
116
- if (
117
- dataset_load_config == "20231101.ja"
118
- or dataset_load_config == "20231101.vi"
119
- or dataset_load_config == "20231101.es"
120
- or dataset_load_config == "20231101.de"
121
- ):
116
+ if dataset_load_config == "20231101.ja" or dataset_load_config == "20231101.vi" or dataset_load_config == "20231101.es" :
122
117
data = data .filter (lambda item , idx : idx % 3 == 0 , with_indices = True )
118
+ if dataset_load_config == "20231101.de" :
119
+ data = data .filter (lambda item , idx : idx % 5 == 0 , with_indices = True )
123
120
else :
124
121
data = load_dataset (dataset_id , split = "train" , num_proc = 32 )
125
122
You can’t perform that action at this time.
0 commit comments