@@ -2218,12 +2218,60 @@ def unsloth_convert_lora_to_ggml_and_save_locally(
2218
2218
2219
2219
2220
2220
from .models .loader_utils import get_model_name
2221
- from unsloth_zoo .saving_utils import merge_and_overwrite_lora
2221
+ from unsloth_zoo .saving_utils import (
2222
+ merge_and_overwrite_lora ,
2223
+ prepare_saving ,
2224
+ )
2222
2225
from unsloth_zoo .llama_cpp import (
2223
2226
install_llama_cpp ,
2224
- convert_to_gguf ,
2227
+ convert_to_gguf as _convert_to_gguf ,
2225
2228
)
2226
2229
2230
+ @torch .inference_mode
2231
+ def save_to_gguf_generic (
2232
+ model ,
2233
+ save_directory ,
2234
+ quantization_type = "Q8_0" ,
2235
+ repo_id = None ,
2236
+ token = None ,
2237
+ ):
2238
+ if token is None and repo_id is not None : token = get_token ()
2239
+ if repo_id is not None and token is None :
2240
+ raise RuntimeError ("Unsloth: Please specify a token for uploading!" )
2241
+
2242
+ if not os .path .exists (os .path .join ("llama.cpp" , "unsloth_convert_hf_to_gguf.py" )):
2243
+ install_llama_cpp (just_clone_repo = True )
2244
+ pass
2245
+
2246
+ metadata = _convert_to_gguf (
2247
+ save_directory ,
2248
+ print_output = True ,
2249
+ quantization_type = quantization_type ,
2250
+ )
2251
+ if repo_id is not None :
2252
+ prepare_saving (
2253
+ model ,
2254
+ repo_id ,
2255
+ push_to_hub = True ,
2256
+ max_shard_size = "50GB" ,
2257
+ private = True ,
2258
+ token = token ,
2259
+ )
2260
+
2261
+ from huggingface_hub import HfApi
2262
+ api = HfApi (token = token )
2263
+ api .upload_folder (
2264
+ folder_path = save_directory ,
2265
+ repo_id = repo_id ,
2266
+ repo_type = "model" ,
2267
+ allow_patterns = ["*.gguf" ],
2268
+ private = True ,
2269
+ )
2270
+ pass
2271
+ return metadata
2272
+ pass
2273
+
2274
+
2227
2275
@torch .inference_mode
2228
2276
def unsloth_generic_save (
2229
2277
model ,
@@ -2467,8 +2515,8 @@ def patch_saving_functions(model, vision = False):
2467
2515
# Vision only 1 option
2468
2516
model .push_to_hub_merged = types .MethodType (unsloth_generic_push_to_hub_merged , model )
2469
2517
model .save_pretrained_merged = types .MethodType (unsloth_generic_save_pretrained_merged , model )
2470
- model .push_to_hub_gguf = types .MethodType (not_implemented_save , model )
2471
- model .save_pretrained_gguf = types .MethodType (not_implemented_save , model )
2518
+ model .push_to_hub_gguf = types .MethodType (save_to_gguf_generic , model )
2519
+ model .save_pretrained_gguf = types .MethodType (save_to_gguf_generic , model )
2472
2520
pass
2473
2521
return model
2474
2522
pass
0 commit comments