@@ -84,13 +84,15 @@ def _load_tokenizer_from_path(self, tokenizer_path: str) -> Tokenizer:
84
84
if not os .path .exists (tokenizer_path ):
85
85
if "assets/tokenizer" in tokenizer_path :
86
86
raise FileNotFoundError (
87
- "Detected deprecated ./assets/tokenizer path. Remove --model.tokenizer_path "
88
- "and download to --model.hf_assets_path using ./scripts/download_hf_assets.py"
87
+ "Detected ./assets/tokenizer path which was deprecated in https://github.com/pytorch/torchtitan/pull/1540. \n "
88
+ "Remove --model.tokenizer_path and download to --model.hf_assets_path using ./scripts/download_hf_assets.py\n "
89
+ "See example: https://github.com/pytorch/torchtitan/tree/main/torchtitan/models/deepseek_v3#download-tokenizer"
89
90
)
90
91
else :
91
92
raise FileNotFoundError (
92
93
f"Tokenizer path '{ tokenizer_path } ' does not exist"
93
94
)
95
+
94
96
# Define paths for different tokenizer file types
95
97
tokenizer_json_path = os .path .join (tokenizer_path , "tokenizer.json" )
96
98
vocab_txt_path = os .path .join (tokenizer_path , "vocab.txt" )
@@ -165,17 +167,11 @@ def _load_tokenizer_from_path(self, tokenizer_path: str) -> Tokenizer:
165
167
for f in os .listdir (tokenizer_path )
166
168
if os .path .isfile (os .path .join (tokenizer_path , f ))
167
169
]
168
- if "assets/tokenizer" in tokenizer_path :
169
- raise FileNotFoundError (
170
- "Detected deprecated ./assets/tokenizer path. Remove --model.tokenizer_path "
171
- "and download to --model.hf_assets_path using ./scripts/download_hf_assets.py"
172
- )
173
- else :
174
- raise FileNotFoundError (
175
- f"No supported tokenizer files found in '{ tokenizer_path } '. "
176
- f"Available files: { available_files } . "
177
- "Looking for: tokenizer.json, vocab.txt+merges.txt, or vocab.json+merges.txt"
178
- )
170
+ raise FileNotFoundError (
171
+ f"No supported tokenizer files found in '{ tokenizer_path } '. "
172
+ f"Available files: { available_files } . "
173
+ "Looking for: tokenizer.json, vocab.txt+merges.txt, or vocab.json+merges.txt"
174
+ )
179
175
180
176
def _get_token_from_config (self , config : dict [str , Any ], key : str ) -> Optional [str ]:
181
177
"""
0 commit comments