Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion weights_conversion/hf_to_megatron.py
Original file line number Diff line number Diff line change
Expand Up @@ -358,6 +358,8 @@ def main(model_name: str = "falcon", size: int = 7, out: Optional[Path] = None,
args.update({"num_attention_heads_kv": 8})
if size < 34 and not re.match(r"CodeLlama-\d+b-Python", cache_dir):
args.update({"padded_vocab_size": 32016})
if size == 70: # The vocab size of the three versions of codellama-70b is 32016
args.update({"padded_vocab_size": 32016})
else:
sys.exit(f"Model name has to be llama, llama2 or codellama, not {model_name}.")

Expand Down Expand Up @@ -440,7 +442,7 @@ def main(model_name: str = "falcon", size: int = 7, out: Optional[Path] = None,
elif args.model == "llama":
assert args.size in {7, 13, 30, 65}
elif args.model == "codellama":
assert args.size in {7, 13, 34}
assert args.size in {7, 13, 34, 70}
elif args.model == "mistral":
assert args.size in {7}
else:
Expand Down