Skip to content

Commit acf93fa

Browse files
committed
Update src/dataset/load.py
1 parent d2932d6 commit acf93fa

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

src/dataset/load.py

+6-6
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,16 @@
2626
load_dataset("Open-Orca/OpenOrca", trust_remote_code=True)
2727

2828
# デカい
29-
load_dataset("cc100", "en", trust_remote_code=True)
30-
load_dataset("cc100", "ja", trust_remote_code=True)
31-
load_dataset("allenai/c4", "en", trust_remote_code=True)
32-
load_dataset("allenai/c4", "ja", trust_remote_code=True)
29+
# load_dataset("cc100", "en", trust_remote_code=True)
30+
# load_dataset("cc100", "ja", trust_remote_code=True)
31+
# load_dataset("allenai/c4", "en", trust_remote_code=True)
32+
# load_dataset("allenai/c4", "ja", trust_remote_code=True)
3333

3434
# デカすぎる
3535
# TinyLlamaが使ってる、895 GB
36-
# load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True)
36+
load_dataset("cerebras/SlimPajama-627B", trust_remote_code=True)
3737
# TinyLlamaが使ってる、311 GB
38-
# load_dataset("bigcode/starcoderdata", trust_remote_code=True)
38+
load_dataset("bigcode/starcoderdata", trust_remote_code=True)
3939
# 886 GB
4040
# load_dataset("EleutherAI/pile", "all", trust_remote_code=True)
4141
# load_dataset("oscar")

0 commit comments

Comments
 (0)