Skip to content

Commit 3bf0ed4

Browse files
committed
Update src/train.py
1 parent 5984454 commit 3bf0ed4

File tree

1 file changed

+3
-6
lines changed

1 file changed

+3
-6
lines changed

src/train.py

+3-6
Original file line numberDiff line numberDiff line change
@@ -113,13 +113,10 @@ def prepare_train_data(dataset_id):
113113
if "dataset_load_config" in train_config:
114114
dataset_load_config = train_config["dataset_load_config"]
115115
data = load_dataset(dataset_id, dataset_load_config, split="train", num_proc=32)
116-
if (
117-
dataset_load_config == "20231101.ja"
118-
or dataset_load_config == "20231101.vi"
119-
or dataset_load_config == "20231101.es"
120-
or dataset_load_config == "20231101.de"
121-
):
116+
if dataset_load_config == "20231101.ja" or dataset_load_config == "20231101.vi" or dataset_load_config == "20231101.es":
122117
data = data.filter(lambda item, idx: idx % 3 == 0, with_indices=True)
118+
if dataset_load_config == "20231101.de":
119+
data = data.filter(lambda item, idx: idx % 5 == 0, with_indices=True)
123120
else:
124121
data = load_dataset(dataset_id, split="train", num_proc=32)
125122

0 commit comments

Comments
 (0)