Skip to content

Commit cdf94a1

Browse files
ochafikngxson
andauthored
server: --offline mode (#13804)
* server: --offline mode (env: LLAMA_OFFLINE) --------- Co-authored-by: Xuan-Son Nguyen <[email protected]>
1 parent a26c4cc commit cdf94a1

File tree

2 files changed

+114
-93
lines changed

2 files changed

+114
-93
lines changed

common/arg.cpp

Lines changed: 113 additions & 93 deletions
Original file line numberDiff line numberDiff line change
@@ -242,33 +242,7 @@ static bool curl_perform_with_retry(const std::string & url, CURL * curl, int ma
242242
}
243243

244244
// download one single file from remote URL to local path
245-
static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token) {
246-
// Initialize libcurl
247-
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
248-
curl_slist_ptr http_headers;
249-
if (!curl) {
250-
LOG_ERR("%s: error initializing libcurl\n", __func__);
251-
return false;
252-
}
253-
254-
// Set the URL, allow to follow http redirection
255-
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
256-
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
257-
258-
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
259-
// Check if hf-token or bearer-token was specified
260-
if (!bearer_token.empty()) {
261-
std::string auth_header = "Authorization: Bearer " + bearer_token;
262-
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
263-
}
264-
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
265-
266-
#if defined(_WIN32)
267-
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
268-
// operating system. Currently implemented under MS-Windows.
269-
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
270-
#endif
271-
245+
static bool common_download_file_single(const std::string & url, const std::string & path, const std::string & bearer_token, bool offline) {
272246
// Check if the file already exists locally
273247
auto file_exists = std::filesystem::exists(path);
274248

@@ -279,6 +253,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
279253
std::string last_modified;
280254

281255
if (file_exists) {
256+
if (offline) {
257+
LOG_INF("%s: using cached file (offline mode): %s\n", __func__, path.c_str());
258+
return true; // skip verification/downloading
259+
}
282260
// Try and read the JSON metadata file (note: stream autoclosed upon exiting this block).
283261
std::ifstream metadata_in(metadata_path);
284262
if (metadata_in.good()) {
@@ -297,6 +275,10 @@ static bool common_download_file_single(const std::string & url, const std::stri
297275
}
298276
// if we cannot open the metadata file, we assume that the downloaded file is not valid (etag and last-modified are left empty, so we will download it again)
299277
} else {
278+
if (offline) {
279+
LOG_ERR("%s: required file is not available in cache (offline mode): %s\n", __func__, path.c_str());
280+
return false;
281+
}
300282
LOG_INF("%s: no previous model file found %s\n", __func__, path.c_str());
301283
}
302284

@@ -310,50 +292,73 @@ static bool common_download_file_single(const std::string & url, const std::stri
310292
bool head_request_ok = false;
311293
bool should_download = !file_exists; // by default, we should download if the file does not exist
312294

313-
// get ETag to see if the remote file has changed
314-
{
315-
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
316-
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
317-
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
295+
// Initialize libcurl
296+
curl_ptr curl(curl_easy_init(), &curl_easy_cleanup);
297+
curl_slist_ptr http_headers;
298+
if (!curl) {
299+
LOG_ERR("%s: error initializing libcurl\n", __func__);
300+
return false;
301+
}
302+
303+
// Set the URL, allow to follow http redirection
304+
curl_easy_setopt(curl.get(), CURLOPT_URL, url.c_str());
305+
curl_easy_setopt(curl.get(), CURLOPT_FOLLOWLOCATION, 1L);
318306

319-
static std::regex header_regex("([^:]+): (.*)\r\n");
320-
static std::regex etag_regex("ETag", std::regex_constants::icase);
321-
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
307+
http_headers.ptr = curl_slist_append(http_headers.ptr, "User-Agent: llama-cpp");
308+
// Check if hf-token or bearer-token was specified
309+
if (!bearer_token.empty()) {
310+
std::string auth_header = "Authorization: Bearer " + bearer_token;
311+
http_headers.ptr = curl_slist_append(http_headers.ptr, auth_header.c_str());
312+
}
313+
curl_easy_setopt(curl.get(), CURLOPT_HTTPHEADER, http_headers.ptr);
322314

323-
std::string header(buffer, n_items);
324-
std::smatch match;
325-
if (std::regex_match(header, match, header_regex)) {
326-
const std::string & key = match[1];
327-
const std::string & value = match[2];
328-
if (std::regex_match(key, match, etag_regex)) {
329-
headers->etag = value;
330-
} else if (std::regex_match(key, match, last_modified_regex)) {
331-
headers->last_modified = value;
332-
}
333-
}
334-
return n_items;
335-
};
315+
#if defined(_WIN32)
316+
// CURLSSLOPT_NATIVE_CA tells libcurl to use standard certificate store of
317+
// operating system. Currently implemented under MS-Windows.
318+
curl_easy_setopt(curl.get(), CURLOPT_SSL_OPTIONS, CURLSSLOPT_NATIVE_CA);
319+
#endif
336320

337-
curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
338-
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
339-
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
340-
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
321+
typedef size_t(*CURLOPT_HEADERFUNCTION_PTR)(char *, size_t, size_t, void *);
322+
auto header_callback = [](char * buffer, size_t /*size*/, size_t n_items, void * userdata) -> size_t {
323+
common_load_model_from_url_headers * headers = (common_load_model_from_url_headers *) userdata;
341324

342-
// we only allow retrying once for HEAD requests
343-
// this is for the use case of using running offline (no internet), retrying can be annoying
344-
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
345-
if (!was_perform_successful) {
346-
head_request_ok = false;
347-
}
325+
static std::regex header_regex("([^:]+): (.*)\r\n");
326+
static std::regex etag_regex("ETag", std::regex_constants::icase);
327+
static std::regex last_modified_regex("Last-Modified", std::regex_constants::icase);
348328

349-
long http_code = 0;
350-
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
351-
if (http_code == 200) {
352-
head_request_ok = true;
353-
} else {
354-
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
355-
head_request_ok = false;
329+
std::string header(buffer, n_items);
330+
std::smatch match;
331+
if (std::regex_match(header, match, header_regex)) {
332+
const std::string & key = match[1];
333+
const std::string & value = match[2];
334+
if (std::regex_match(key, match, etag_regex)) {
335+
headers->etag = value;
336+
} else if (std::regex_match(key, match, last_modified_regex)) {
337+
headers->last_modified = value;
338+
}
356339
}
340+
return n_items;
341+
};
342+
343+
curl_easy_setopt(curl.get(), CURLOPT_NOBODY, 1L); // will trigger the HEAD verb
344+
curl_easy_setopt(curl.get(), CURLOPT_NOPROGRESS, 1L); // hide head request progress
345+
curl_easy_setopt(curl.get(), CURLOPT_HEADERFUNCTION, static_cast<CURLOPT_HEADERFUNCTION_PTR>(header_callback));
346+
curl_easy_setopt(curl.get(), CURLOPT_HEADERDATA, &headers);
347+
348+
// we only allow retrying once for HEAD requests
349+
// this is for the use case of using running offline (no internet), retrying can be annoying
350+
bool was_perform_successful = curl_perform_with_retry(url, curl.get(), 1, 0, "HEAD");
351+
if (!was_perform_successful) {
352+
head_request_ok = false;
353+
}
354+
355+
long http_code = 0;
356+
curl_easy_getinfo(curl.get(), CURLINFO_RESPONSE_CODE, &http_code);
357+
if (http_code == 200) {
358+
head_request_ok = true;
359+
} else {
360+
LOG_WRN("%s: HEAD invalid http status code received: %ld\n", __func__, http_code);
361+
head_request_ok = false;
357362
}
358363

359364
// if head_request_ok is false, we don't have the etag or last-modified headers
@@ -460,12 +465,12 @@ static bool common_download_file_single(const std::string & url, const std::stri
460465

461466
// download multiple files from remote URLs to local paths
462467
// the input is a vector of pairs <url, path>
463-
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token) {
468+
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> & urls, const std::string & bearer_token, bool offline) {
464469
// Prepare download in parallel
465470
std::vector<std::future<bool>> futures_download;
466471
for (auto const & item : urls) {
467-
futures_download.push_back(std::async(std::launch::async, [bearer_token](const std::pair<std::string, std::string> & it) -> bool {
468-
return common_download_file_single(it.first, it.second, bearer_token);
472+
futures_download.push_back(std::async(std::launch::async, [bearer_token, offline](const std::pair<std::string, std::string> & it) -> bool {
473+
return common_download_file_single(it.first, it.second, bearer_token, offline);
469474
}, item));
470475
}
471476

@@ -481,14 +486,15 @@ static bool common_download_file_multiple(const std::vector<std::pair<std::strin
481486

482487
static bool common_download_model(
483488
const common_params_model & model,
484-
const std::string & bearer_token) {
489+
const std::string & bearer_token,
490+
bool offline) {
485491
// Basic validation of the model.url
486492
if (model.url.empty()) {
487493
LOG_ERR("%s: invalid model url\n", __func__);
488494
return false;
489495
}
490496

491-
if (!common_download_file_single(model.url, model.path, bearer_token)) {
497+
if (!common_download_file_single(model.url, model.path, bearer_token, offline)) {
492498
return false;
493499
}
494500

@@ -547,7 +553,7 @@ static bool common_download_model(
547553
}
548554

549555
// Download in parallel
550-
common_download_file_multiple(urls, bearer_token);
556+
common_download_file_multiple(urls, bearer_token, offline);
551557
}
552558

553559
return true;
@@ -608,7 +614,7 @@ std::pair<long, std::vector<char>> common_remote_get_content(const std::string &
608614
*
609615
* Note: we use the Ollama-compatible HF API, but not using the blobId. Instead, we use the special "ggufFile" field which returns the value for "hf_file". This is done to be backward-compatible with existing cache files.
610616
*/
611-
static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token) {
617+
static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_with_tag, const std::string & bearer_token, bool offline) {
612618
auto parts = string_split<std::string>(hf_repo_with_tag, ':');
613619
std::string tag = parts.size() > 1 ? parts.back() : "latest";
614620
std::string hf_repo = parts[0];
@@ -638,20 +644,25 @@ static struct common_hf_file_res common_get_hf_file(const std::string & hf_repo_
638644
long res_code = 0;
639645
std::string res_str;
640646
bool use_cache = false;
641-
try {
642-
auto res = common_remote_get_content(url, params);
643-
res_code = res.first;
644-
res_str = std::string(res.second.data(), res.second.size());
645-
} catch (const std::exception & e) {
646-
LOG_WRN("error: failed to get manifest: %s\n", e.what());
647-
LOG_WRN("try reading from cache\n");
648-
// try to read from cache
647+
if (!offline) {
649648
try {
649+
auto res = common_remote_get_content(url, params);
650+
res_code = res.first;
651+
res_str = std::string(res.second.data(), res.second.size());
652+
} catch (const std::exception & e) {
653+
LOG_WRN("error: failed to get manifest at %s: %s\n", url.c_str(), e.what());
654+
}
655+
}
656+
if (res_code == 0) {
657+
if (std::filesystem::exists(cached_response_path)) {
658+
LOG_WRN("trying to read manifest from cache: %s\n", cached_response_path.c_str());
650659
res_str = read_file(cached_response_path);
651660
res_code = 200;
652661
use_cache = true;
653-
} catch (const std::exception & e) {
654-
throw std::runtime_error("error: failed to get manifest (check your internet connection)");
662+
} else {
663+
throw std::runtime_error(
664+
offline ? "error: failed to get manifest (offline mode)"
665+
: "error: failed to get manifest (check your internet connection)");
655666
}
656667
}
657668
std::string ggufFile;
@@ -698,24 +709,25 @@ bool common_has_curl() {
698709
return false;
699710
}
700711

701-
static bool common_download_file_single(const std::string &, const std::string &, const std::string &) {
712+
static bool common_download_file_single(const std::string &, const std::string &, const std::string &, bool) {
702713
LOG_ERR("error: built without CURL, cannot download model from internet\n");
703714
return false;
704715
}
705716

706-
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &) {
717+
static bool common_download_file_multiple(const std::vector<std::pair<std::string, std::string>> &, const std::string &, bool) {
707718
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
708719
return false;
709720
}
710721

711722
static bool common_download_model(
712723
const common_params_model &,
713-
const std::string &) {
724+
const std::string &,
725+
bool) {
714726
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
715727
return false;
716728
}
717729

718-
static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &) {
730+
static struct common_hf_file_res common_get_hf_file(const std::string &, const std::string &, bool) {
719731
LOG_ERR("error: built without CURL, cannot download model from the internet\n");
720732
return {};
721733
}
@@ -742,15 +754,16 @@ struct handle_model_result {
742754
static handle_model_result common_params_handle_model(
743755
struct common_params_model & model,
744756
const std::string & bearer_token,
745-
const std::string & model_path_default) {
757+
const std::string & model_path_default,
758+
bool offline) {
746759
handle_model_result result;
747760
// handle pre-fill default model path and url based on hf_repo and hf_file
748761
{
749762
if (!model.hf_repo.empty()) {
750763
// short-hand to avoid specifying --hf-file -> default it to --model
751764
if (model.hf_file.empty()) {
752765
if (model.path.empty()) {
753-
auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token);
766+
auto auto_detected = common_get_hf_file(model.hf_repo, bearer_token, offline);
754767
if (auto_detected.repo.empty() || auto_detected.ggufFile.empty()) {
755768
exit(1); // built without CURL, error message already printed
756769
}
@@ -791,7 +804,7 @@ static handle_model_result common_params_handle_model(
791804

792805
// then, download it if needed
793806
if (!model.url.empty()) {
794-
bool ok = common_download_model(model, bearer_token);
807+
bool ok = common_download_model(model, bearer_token, offline);
795808
if (!ok) {
796809
LOG_ERR("error: failed to download model from %s\n", model.url.c_str());
797810
exit(1);
@@ -934,7 +947,7 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
934947

935948
// handle model and download
936949
{
937-
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH);
950+
auto res = common_params_handle_model(params.model, params.hf_token, DEFAULT_MODEL_PATH, params.offline);
938951
if (params.no_mmproj) {
939952
params.mmproj = {};
940953
} else if (res.found_mmproj && params.mmproj.path.empty() && params.mmproj.url.empty()) {
@@ -944,12 +957,12 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context
944957
// only download mmproj if the current example is using it
945958
for (auto & ex : mmproj_examples) {
946959
if (ctx_arg.ex == ex) {
947-
common_params_handle_model(params.mmproj, params.hf_token, "");
960+
common_params_handle_model(params.mmproj, params.hf_token, "", params.offline);
948961
break;
949962
}
950963
}
951-
common_params_handle_model(params.speculative.model, params.hf_token, "");
952-
common_params_handle_model(params.vocoder.model, params.hf_token, "");
964+
common_params_handle_model(params.speculative.model, params.hf_token, "", params.offline);
965+
common_params_handle_model(params.vocoder.model, params.hf_token, "", params.offline);
953966
}
954967

955968
if (params.escape) {
@@ -2996,6 +3009,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
29963009
common_log_set_verbosity_thold(INT_MAX);
29973010
}
29983011
));
3012+
add_opt(common_arg(
3013+
{"--offline"},
3014+
"Offline mode: forces use of cache, prevents network access",
3015+
[](common_params & params) {
3016+
params.offline = true;
3017+
}
3018+
).set_env("LLAMA_OFFLINE"));
29993019
add_opt(common_arg(
30003020
{"-lv", "--verbosity", "--log-verbosity"}, "N",
30013021
"Set the verbosity threshold. Messages with a higher verbosity will be ignored.",

common/common.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ struct common_params {
291291
int32_t verbosity = 0;
292292
int32_t control_vector_layer_start = -1; // layer range for control vector
293293
int32_t control_vector_layer_end = -1; // layer range for control vector
294+
bool offline = false;
294295

295296
int32_t ppl_stride = 0; // stride for perplexity calculations. If left at 0, the pre-existing approach will be used.
296297
int32_t ppl_output_type = 0; // = 0 -> ppl output is as usual, = 1 -> ppl output is num_tokens, ppl, one per line

0 commit comments

Comments
 (0)