We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent ce93914 commit 2279b6bCopy full SHA for 2279b6b
src/dataset/prepare.py
@@ -4,6 +4,7 @@
4
5
from litgpt import HFTokenizer
6
from litgpt.data.prepare_starcoder import DataChunkRecipe
7
+from litdata.processing.data_processor import DataProcessor
8
9
from datasets.load import load_dataset
10
@@ -67,7 +68,6 @@ def prepare_for_dataset(
67
68
chunk_size: int,
69
) -> None:
70
destination_path.mkdir(parents=True, exist_ok=True)
- from litdata.processing.data_processor import DataProcessor
71
72
tokenizer = HFTokenizer(tokenizer_path)
73
data_recipe = YuisekinAIDataRecipe(tokenizer=tokenizer, chunk_size=chunk_size)
0 commit comments