Skip to content

Commit c980304

Browse files
committed
tweak
1 parent 2279b6b commit c980304

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

src/train/tokenizer.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
# https://zenn.dev/if001/articles/87bbe893411fa1
1+
# 参考:
2+
# - https://zenn.dev/if001/articles/87bbe893411fa1
23
from datasets.arrow_dataset import Dataset
34
from datasets.load import load_dataset
45
from tokenizers import Tokenizer, decoders, models, normalizers, pre_tokenizers, trainers
@@ -40,7 +41,6 @@ def ds_yielder():
4041
else:
4142
ds = raw_dataset
4243
print("ds", ds)
43-
# ds = ds.select(range(0, 100))
4444
if "aya" in dataset_id:
4545
for v in ds["inputs"]:
4646
yield v

0 commit comments

Comments
 (0)