Skip to content

Commit

Permalink
rpc: fix register position (#11424)
Browse files Browse the repository at this point in the history
Signed-off-by: thxCode <[email protected]>
  • Loading branch information
thxCode authored Jan 26, 2025
1 parent 2cc9b8c commit 1d8ee06
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 1 deletion.
2 changes: 2 additions & 0 deletions src/llama-model.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1303,10 +1303,12 @@ bool llama_model::load_tensors(llama_model_loader & ml) {
const int act_gpu_layers = devices.empty() ? 0 : std::min(n_gpu_layers, (int)n_layer + 1);
auto get_layer_buft_list = [&](int il) -> llama_model::impl::layer_dev {
if (il < i_gpu_start || (il - i_gpu_start) >= act_gpu_layers) {
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(cpu_dev));
return {cpu_dev, &pimpl->cpu_buft_list};
}
const int layer_gpu = std::upper_bound(splits.begin(), splits.begin() + n_devices(), float(il - i_gpu_start)/act_gpu_layers) - splits.begin();
auto * dev = devices.at(layer_gpu);
LLAMA_LOG_DEBUG("load_tensors: layer %3d assigned to device %s\n", il, ggml_backend_dev_name(dev));
return {dev, &pimpl->gpu_buft_list.at(dev)};
};

Expand Down
12 changes: 11 additions & 1 deletion src/llama.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -9405,6 +9405,7 @@ static struct llama_model * llama_model_load_from_file_impl(
model->devices.push_back(*dev);
}
} else {
std::vector<ggml_backend_dev_t> rpc_servers;
// use all available devices
for (size_t i = 0; i < ggml_backend_dev_count(); ++i) {
ggml_backend_dev_t dev = ggml_backend_dev_get(i);
Expand All @@ -9415,10 +9416,19 @@ static struct llama_model * llama_model_load_from_file_impl(
break;

case GGML_BACKEND_DEVICE_TYPE_GPU:
model->devices.push_back(dev);
ggml_backend_reg_t reg = ggml_backend_dev_backend_reg(dev);
if (ggml_backend_reg_name(reg) == std::string("RPC")) {
rpc_servers.push_back(dev);
} else {
model->devices.push_back(dev);
}
break;
}
}
// add RPC servers at the front of the list
if (!rpc_servers.empty()) {
model->devices.insert(model->devices.begin(), rpc_servers.begin(), rpc_servers.end());
}
}

// if using single GPU mode, remove all except the main GPU
Expand Down

0 comments on commit 1d8ee06

Please sign in to comment.