@@ -242,33 +242,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
242
242
}
243
243
244
244
// download one single file from remote URL to local path
245
- static bool common_download_file_single (const std::string & url, const std::string & path, const std::string & bearer_token) {
246
- // Initialize libcurl
247
- curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
248
- curl_slist_ptr http_headers;
249
- if (!curl) {
250
- LOG_ERR (" %s: error initializing libcurl\n " , __func__);
251
- return false ;
252
- }
253
-
254
- // Set the URL, allow to follow http redirection
255
- curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
256
- curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
257
-
258
- http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
259
- // Check if hf-token or bearer-token was specified
260
- if (!bearer_token.empty ()) {
261
- std::string auth_header = " Authorization: Bearer " + bearer_token;
262
- http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
263
- }
264
- curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
265
-
266
- #if defined(_WIN32)
267
- // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
268
- // operating system. Currently implemented under MS-Windows.
269
- curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
270
- #endif
271
-
245
+ static bool common_download_file_single (const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
272
246
// Check if the file already exists locally
273
247
auto file_exists = std::filesystem::exists (path);
274
248
@@ -279,6 +253,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
279
253
std::string last_modified;
280
254
281
255
if (file_exists) {
256
+ if (offline) {
257
+ LOG_INF (" %s: using cached file (offline mode): %s\n " , __func__, path.c_str ());
258
+ return true ; // skip verification/downloading
259
+ }
282
260
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
283
261
std::ifstream metadata_in (metadata_path);
284
262
if (metadata_in.good ()) {
@@ -297,6 +275,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
297
275
}
298
276
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
299
277
} else {
278
+ if (offline) {
279
+ LOG_ERR (" %s: required file is not available in cache (offline mode): %s\n " , __func__, path.c_str ());
280
+ return false ;
281
+ }
300
282
LOG_INF (" %s: no previous model file found %s\n " , __func__, path.c_str ());
301
283
}
302
284
@@ -310,50 +292,73 @@ static bool common_download_file_single(const std::string & url, const std::stri
310
292
bool head_request_ok = false ;
311
293
bool should_download = !file_exists; // by default, we should download if the file does not exist
312
294
313
- // get ETag to see if the remote file has changed
314
- {
315
- typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
316
- auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
317
- common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
295
+ // Initialize libcurl
296
+ curl_ptr curl (curl_easy_init (), &curl_easy_cleanup);
297
+ curl_slist_ptr http_headers;
298
+ if (!curl) {
299
+ LOG_ERR (" %s: error initializing libcurl\n " , __func__);
300
+ return false ;
301
+ }
302
+
303
+ // Set the URL, allow to follow http redirection
304
+ curl_easy_setopt (curl.get (), CURLOPT_URL, url.c_str ());
305
+ curl_easy_setopt (curl.get (), CURLOPT_FOLLOWLOCATION, 1L );
318
306
319
- static std::regex header_regex (" ([^:]+): (.*)\r\n " );
320
- static std::regex etag_regex (" ETag" , std::regex_constants::icase);
321
- static std::regex last_modified_regex (" Last-Modified" , std::regex_constants::icase);
307
+ http_headers.ptr = curl_slist_append (http_headers.ptr , " User-Agent: llama-cpp" );
308
+ // Check if hf-token or bearer-token was specified
309
+ if (!bearer_token.empty ()) {
310
+ std::string auth_header = " Authorization: Bearer " + bearer_token;
311
+ http_headers.ptr = curl_slist_append (http_headers.ptr , auth_header.c_str ());
312
+ }
313
+ curl_easy_setopt (curl.get (), CURLOPT_HTTPHEADER, http_headers.ptr );
322
314
323
- std::string header (buffer, n_items);
324
- std::smatch match;
325
- if (std::regex_match (header, match, header_regex)) {
326
- const std::string & key = match[1 ];
327
- const std::string & value = match[2 ];
328
- if (std::regex_match (key, match, etag_regex)) {
329
- headers->etag = value;
330
- } else if (std::regex_match (key, match, last_modified_regex)) {
331
- headers->last_modified = value;
332
- }
333
- }
334
- return n_items;
335
- };
315
+ #if defined(_WIN32)
316
+ // CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
317
+ // operating system. Currently implemented under MS-Windows.
318
+ curl_easy_setopt (curl.get (), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
319
+ #endif
336
320
337
- curl_easy_setopt (curl.get (), CURLOPT_NOBODY, 1L ); // will trigger the HEAD verb
338
- curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); // hide head request progress
339
- curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
340
- curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
321
+ typedef size_t (*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t , size_t , void *);
322
+ auto header_callback = [](char * buffer, size_t /* size*/ , size_t n_items, void * userdata) -> size_t {
323
+ common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
341
324
342
- // we only allow retrying once for HEAD requests
343
- // this is for the use case of using running offline (no internet), retrying can be annoying
344
- bool was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 , " HEAD" );
345
- if (!was_perform_successful) {
346
- head_request_ok = false ;
347
- }
325
+ static std::regex header_regex (" ([^:]+): (.*)\r\n " );
326
+ static std::regex etag_regex (" ETag" , std::regex_constants::icase);
327
+ static std::regex last_modified_regex (" Last-Modified" , std::regex_constants::icase);
348
328
349
- long http_code = 0 ;
350
- curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
351
- if (http_code == 200 ) {
352
- head_request_ok = true ;
353
- } else {
354
- LOG_WRN (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
355
- head_request_ok = false ;
329
+ std::string header (buffer, n_items);
330
+ std::smatch match;
331
+ if (std::regex_match (header, match, header_regex)) {
332
+ const std::string & key = match[1 ];
333
+ const std::string & value = match[2 ];
334
+ if (std::regex_match (key, match, etag_regex)) {
335
+ headers->etag = value;
336
+ } else if (std::regex_match (key, match, last_modified_regex)) {
337
+ headers->last_modified = value;
338
+ }
356
339
}
340
+ return n_items;
341
+ };
342
+
343
+ curl_easy_setopt (curl.get (), CURLOPT_NOBODY, 1L ); // will trigger the HEAD verb
344
+ curl_easy_setopt (curl.get (), CURLOPT_NOPROGRESS, 1L ); // hide head request progress
345
+ curl_easy_setopt (curl.get (), CURLOPT_HEADERFUNCTION, static_cast <CURLOPT_HEADERFUNCTION_PTR>(header_callback));
346
+ curl_easy_setopt (curl.get (), CURLOPT_HEADERDATA, &headers);
347
+
348
+ // we only allow retrying once for HEAD requests
349
+ // this is for the use case of using running offline (no internet), retrying can be annoying
350
+ bool was_perform_successful = curl_perform_with_retry (url, curl.get (), 1 , 0 , " HEAD" );
351
+ if (!was_perform_successful) {
352
+ head_request_ok = false ;
353
+ }
354
+
355
+ long http_code = 0 ;
356
+ curl_easy_getinfo (curl.get (), CURLINFO_RESPONSE_CODE, &http_code);
357
+ if (http_code == 200 ) {
358
+ head_request_ok = true ;
359
+ } else {
360
+ LOG_WRN (" %s: HEAD invalid http status code received: %ld\n " , __func__, http_code);
361
+ head_request_ok = false ;
357
362
}
358
363
359
364
// if head_request_ok is false, we don't have the etag or last-modified headers
@@ -460,12 +465,12 @@ static bool common_download_file_single(const std::string & url, const std::stri
460
465
461
466
// download multiple files from remote URLs to local paths
462
467
// the input is a vector of pairs <url, path>
463
- static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token) {
468
+ static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline ) {
464
469
// Prepare download in parallel
465
470
std::vector<std::future<bool >> futures_download;
466
471
for (auto const & item : urls) {
467
- futures_download.push_back (std::async (std::launch::async, [bearer_token](const std::pair<std::string, std::string> & it) -> bool {
468
- return common_download_file_single (it.first , it.second , bearer_token);
472
+ futures_download.push_back (std::async (std::launch::async, [bearer_token, offline ](const std::pair<std::string, std::string> & it) -> bool {
473
+ return common_download_file_single (it.first , it.second , bearer_token, offline );
469
474
}, item));
470
475
}
471
476
@@ -481,14 +486,15 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
481
486
482
487
static bool common_download_model (
483
488
const common_params_model & model,
484
- const std::string & bearer_token) {
489
+ const std::string & bearer_token,
490
+ bool offline) {
485
491
// Basic validation of the model.url
486
492
if (model.url .empty ()) {
487
493
LOG_ERR (" %s: invalid model url\n " , __func__);
488
494
return false ;
489
495
}
490
496
491
- if (!common_download_file_single (model.url , model.path , bearer_token)) {
497
+ if (!common_download_file_single (model.url , model.path , bearer_token, offline )) {
492
498
return false ;
493
499
}
494
500
@@ -547,7 +553,7 @@ static bool common_download_model(
547
553
}
548
554
549
555
// Download in parallel
550
- common_download_file_multiple (urls, bearer_token);
556
+ common_download_file_multiple (urls, bearer_token, offline );
551
557
}
552
558
553
559
return true ;
@@ -608,7 +614,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
608
614
*
609
615
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
610
616
*/
611
- static struct common_hf_file_res common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & bearer_token) {
617
+ static struct common_hf_file_res common_get_hf_file (const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline ) {
612
618
auto parts = string_split<std::string>(hf_repo_with_tag, ' :' );
613
619
std::string tag = parts.size () > 1 ? parts.back () : " latest" ;
614
620
std::string hf_repo = parts[0 ];
@@ -638,20 +644,25 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
638
644
long res_code = 0 ;
639
645
std::string res_str;
640
646
bool use_cache = false ;
641
- try {
642
- auto res = common_remote_get_content (url, params);
643
- res_code = res.first ;
644
- res_str = std::string (res.second .data (), res.second .size ());
645
- } catch (const std::exception & e) {
646
- LOG_WRN (" error: failed to get manifest: %s\n " , e.what ());
647
- LOG_WRN (" try reading from cache\n " );
648
- // try to read from cache
647
+ if (!offline) {
649
648
try {
649
+ auto res = common_remote_get_content (url, params);
650
+ res_code = res.first ;
651
+ res_str = std::string (res.second .data (), res.second .size ());
652
+ } catch (const std::exception & e) {
653
+ LOG_WRN (" error: failed to get manifest at %s: %s\n " , url.c_str (), e.what ());
654
+ }
655
+ }
656
+ if (res_code == 0 ) {
657
+ if (std::filesystem::exists (cached_response_path)) {
658
+ LOG_WRN (" trying to read manifest from cache: %s\n " , cached_response_path.c_str ());
650
659
res_str = read_file (cached_response_path);
651
660
res_code = 200 ;
652
661
use_cache = true ;
653
- } catch (const std::exception & e) {
654
- throw std::runtime_error (" error: failed to get manifest (check your internet connection)" );
662
+ } else {
663
+ throw std::runtime_error (
664
+ offline ? " error: failed to get manifest (offline mode)"
665
+ : " error: failed to get manifest (check your internet connection)" );
655
666
}
656
667
}
657
668
std::string ggufFile;
@@ -698,24 +709,25 @@ bool common_has_curl() {
698
709
return false ;
699
710
}
700
711
701
- static bool common_download_file_single (const std::string &, const std::string &, const std::string &) {
712
+ static bool common_download_file_single (const std::string &, const std::string &, const std::string &, bool ) {
702
713
LOG_ERR (" error: built without CURL, cannot download model from internet\n " );
703
714
return false ;
704
715
}
705
716
706
- static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> &, const std::string &) {
717
+ static bool common_download_file_multiple (const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool ) {
707
718
LOG_ERR (" error: built without CURL, cannot download model from the internet\n " );
708
719
return false ;
709
720
}
710
721
711
722
static bool common_download_model (
712
723
const common_params_model &,
713
- const std::string &) {
724
+ const std::string &,
725
+ bool ) {
714
726
LOG_ERR (" error: built without CURL, cannot download model from the internet\n " );
715
727
return false ;
716
728
}
717
729
718
- static struct common_hf_file_res common_get_hf_file (const std::string &, const std::string &) {
730
+ static struct common_hf_file_res common_get_hf_file (const std::string &, const std::string &, bool ) {
719
731
LOG_ERR (" error: built without CURL, cannot download model from the internet\n " );
720
732
return {};
721
733
}
@@ -742,15 +754,16 @@ struct handle_model_result {
742
754
static handle_model_result common_params_handle_model (
743
755
struct common_params_model & model,
744
756
const std::string & bearer_token,
745
- const std::string & model_path_default) {
757
+ const std::string & model_path_default,
758
+ bool offline) {
746
759
handle_model_result result;
747
760
// handle pre-fill default model path and url based on hf_repo and hf_file
748
761
{
749
762
if (!model.hf_repo .empty ()) {
750
763
// short-hand to avoid specifying --hf-file -> default it to --model
751
764
if (model.hf_file .empty ()) {
752
765
if (model.path .empty ()) {
753
- auto auto_detected = common_get_hf_file (model.hf_repo , bearer_token);
766
+ auto auto_detected = common_get_hf_file (model.hf_repo , bearer_token, offline );
754
767
if (auto_detected.repo .empty () || auto_detected.ggufFile .empty ()) {
755
768
exit (1 ); // built without CURL, error message already printed
756
769
}
@@ -791,7 +804,7 @@ static handle_model_result common_params_handle_model(
791
804
792
805
// then, download it if needed
793
806
if (!model.url .empty ()) {
794
- bool ok = common_download_model (model, bearer_token);
807
+ bool ok = common_download_model (model, bearer_token, offline );
795
808
if (!ok) {
796
809
LOG_ERR (" error: failed to download model from %s\n " , model.url .c_str ());
797
810
exit (1 );
@@ -934,7 +947,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
934
947
935
948
// handle model and download
936
949
{
937
- auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH);
950
+ auto res = common_params_handle_model (params.model , params.hf_token , DEFAULT_MODEL_PATH, params. offline );
938
951
if (params.no_mmproj ) {
939
952
params.mmproj = {};
940
953
} else if (res.found_mmproj && params.mmproj .path .empty () && params.mmproj .url .empty ()) {
@@ -944,12 +957,12 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
944
957
// only download mmproj if the current example is using it
945
958
for (auto & ex : mmproj_examples) {
946
959
if (ctx_arg.ex == ex) {
947
- common_params_handle_model (params.mmproj , params.hf_token , " " );
960
+ common_params_handle_model (params.mmproj , params.hf_token , " " , params. offline );
948
961
break ;
949
962
}
950
963
}
951
- common_params_handle_model (params.speculative .model , params.hf_token , " " );
952
- common_params_handle_model (params.vocoder .model , params.hf_token , " " );
964
+ common_params_handle_model (params.speculative .model , params.hf_token , " " , params. offline );
965
+ common_params_handle_model (params.vocoder .model , params.hf_token , " " , params. offline );
953
966
}
954
967
955
968
if (params.escape ) {
@@ -2996,6 +3009,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
2996
3009
common_log_set_verbosity_thold (INT_MAX);
2997
3010
}
2998
3011
));
3012
+ add_opt (common_arg (
3013
+ {" --offline" },
3014
+ " Offline mode: forces use of cache, prevents network access" ,
3015
+ [](common_params & params) {
3016
+ params.offline = true ;
3017
+ }
3018
+ ).set_env (" LLAMA_OFFLINE" ));
2999
3019
add_opt (common_arg (
3000
3020
{" -lv" , " --verbosity" , " --log-verbosity" }, " N" ,
3001
3021
" Set the verbosity threshold. Messages with a higher verbosity will be ignored." ,
0 commit comments