Skip to content

Commit 91b627b

Browse files
committed
Fix the reading of epctx blob using stream
1 parent 358f9e7 commit 91b627b

File tree

10 files changed

+389
-338
lines changed

10 files changed

+389
-338
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,13 @@ BackendManager::BackendManager(SessionContext& session_context,
7676
ptr_stream_t model_stream;
7777
std::unique_ptr<onnx::ModelProto> model_proto;
7878
if (subgraph_context_.is_ep_ctx_graph) {
79-
model_stream = ep_ctx_handle_.GetModelBlobStream(session_context_.so_context_file_path, subgraph);
79+
std::cout << " inside is_ep_ctx_graph " << std::endl;
80+
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(session_context_.onnx_model_path_name.filename().string());
81+
auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name;
82+
model_stream = ep_ctx_handle_.GetModelBlobStream(shared_context_,
83+
session_context_.so_context_file_path,
84+
subgraph_name,
85+
subgraph);
8086
} else {
8187
model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
8288
}
@@ -96,7 +102,9 @@ BackendManager::BackendManager(SessionContext& session_context,
96102
if (!sw.mapped_weights) {
97103
sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
98104
}
105+
std::cout << " Call createOVTensors in backend_manager.cc" << std::endl;
99106
backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
107+
std::cout << " create OVTensors successful " << std::endl;
100108
}
101109
}
102110

@@ -197,6 +205,14 @@ BackendManager::BackendManager(SessionContext& session_context,
197205
}
198206
}
199207

208+
std::string BackendManager::stripAfterFirstDot(std::string filename) {
209+
size_t dotPos = filename.find('.'); // Find first dot
210+
if (dotPos == std::string::npos) {
211+
return filename; // No dot found, return full filename
212+
}
213+
return filename.substr(0, dotPos); // Return everything before first dot
214+
}
215+
200216
// Call EPContext model exporter here if the provider option for exporting
201217
// precompiled blob is set. If that's the case:
202218
// By default, create model in embed mode where the blob stream is exported as data within
@@ -210,27 +226,33 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
210226
ORT_THROW(exception_str);
211227
}
212228

229+
std::cout << " inside export compiled model " << std::endl;
230+
213231
// If embed_mode, then pass on the serialized blob
214232
// If not embed_mode, dump the blob here and only pass on the path to the blob
215233
std::string model_blob_str;
216234
auto compiled_model = concrete_backend_->GetOVCompiledModel();
217-
if (session_context_.so_share_ep_contexts){
218-
// std::ostringstream model_blob_stream;
219-
// compiled_model.export_model(model_blob_stream);
235+
if (session_context_.so_share_ep_contexts) {
236+
std::ostringstream model_blob_stream;
237+
compiled_model.export_model(model_blob_stream);
238+
std::cout << " inside export compiled model - share ep contexts" << std::endl;
220239

221240
// std::ofstream file(metadata_filename, std::ios::app| std::ios::binary);
222241
// std::cout << " write to metadata bin - " << metadata_filename << std::endl;
223242
auto& subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
224-
225-
sw::SubgraphMetadata::Map::key_type key{subgraph_context_.subgraph_name};
243+
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(session_context_.onnx_model_path_name.filename().string());
244+
auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name;
245+
sw::SubgraphMetadata::Map::key_type key{subgraph_name};
226246
sw::SubgraphMetadata::Map::mapped_type value{};
227247

228248
auto& bin_file = shared_context_.shared_weights.shared_bin_file.bin_file_;
229-
if (bin_file.is_open()) {
249+
std::cout << " subgraph name " << subgraph_name << "key = " << key.name << " For bin write " << std::endl;
250+
if (!subgraph_metadata.contains(key) && bin_file.is_open()) {
230251
// std::cout << "Current offset before "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl;
231252
value.epctx_offset = bin_file.tellp();
232-
// bin_file << model_blob_stream.str();
233-
compiled_model.export_model(bin_file);
253+
std::cout << " bin file location for writing subgraph = " << bin_file.tellp() << std::endl;
254+
bin_file << model_blob_stream.str();
255+
// compiled_model.export_model(bin_file);
234256
// std::cout << "Current offset after "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl;
235257
value.epctx_length = static_cast<size_t>(static_cast<std::streamoff>(bin_file.tellp()) - value.epctx_offset);
236258
// std::cout << "Key = " << key.name << " Offset = " << value.epctx_offset << " , length = " << value.epctx_length << std::endl;

onnxruntime/core/providers/openvino/backend_manager.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class BackendManager {
4747
ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto,
4848
const std::vector<std::vector<int64_t>>& input_shapes);
4949

50+
std::string stripAfterFirstDot(std::string filename);
51+
5052
std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_;
5153
std::shared_ptr<IBackend> concrete_backend_;
5254
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;

onnxruntime/core/providers/openvino/backend_utils.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -303,22 +303,33 @@ void CreateOVTensors(const std::string& device_name,
303303
SharedContext::SharedWeights::Metadata::Map& metadata_map,
304304
SharedContext::SharedWeights::WeightsFile& weights) {
305305
for (auto& [key, value] : metadata_map) {
306-
if (value.tensor) continue;
306+
// std::cout << " Key = " << key.name << std::endl;
307+
if (value.tensor) {
308+
// std::cout << " Value already present for key = " << key.name << std::endl;
309+
continue;
310+
}
307311

308312
// Get element data type
313+
// std::cout << " value element type = " << value.element_type << std::endl;
309314
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;
310315

311316
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type
312-
317+
// std::cout << "value dimensions = " << std::endl;
318+
// for (auto dim:value.dimensions){
319+
// std::cout << dim << std::endl;
320+
// }
313321
// Create OpenVINO Tensor
314322
if (device_name == "NPU") {
315323
// Use remote tensors
316324
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
317325
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);
318-
326+
// std::cout << " Remote tensor created " << std::endl;
319327
// Copy data to remote tensor
328+
// std::cout << " value size = " << value.size << std::endl;
320329
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
321330
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
331+
// std::cout << " value tensor created " << std::endl;
332+
322333
} else {
323334
// Use vanilla tensors
324335
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
114114
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
115115
}
116116
#endif
117+
std::cout << " loaded model to the plugin " << std::endl;
117118
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
118119
} catch (const char* msg) {
119120
ORT_THROW(msg);
@@ -125,11 +126,14 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
125126
if (session_context_.so_share_ep_contexts) {
126127
initializer = [&metadata](OVInferRequestPtr ir_ptr) {
127128
const auto input_count = ir_ptr->GetNumInputs();
129+
std::cout << " ov ir input count = " << input_count << std::endl;
128130
for (auto i = 0u; i < input_count; i++) {
129131
using Key = SharedContext::SharedWeights::Metadata::Key;
130132
const auto tensor_key = Key{ir_ptr->GetInputTensorName(i)};
131133
if (metadata.contains(tensor_key)) {
132134
auto& value = metadata.at(tensor_key);
135+
// ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");
136+
std::cout << " value tensor is set with shape = " << value.tensor->get_byte_size() << " input size from metadata = " << value.size << std::endl;
133137
ir_ptr->SetTensor(tensor_key.name, value.tensor);
134138
}
135139
}

onnxruntime/core/providers/openvino/contexts.h

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -25,15 +25,15 @@ class SharedContext : public WeakSingleton<SharedContext> {
2525
SharedContext() : OVCore_(OVCore::Get()) {}
2626
struct SharedWeights {
2727
struct Header {
28-
uint32_t bin_version=1;
29-
long footer_offset=0;
30-
}header_;
28+
uint32_t bin_version = 1;
29+
long footer_offset = 0;
30+
} header_;
3131
struct Footer {
3232
long subgraph_offset;
3333
size_t subgraph_length;
3434
long metadata_offset;
3535
size_t metadata_length;
36-
}footer_;
36+
} footer_;
3737

3838
struct Metadata {
3939
struct Key {
@@ -56,8 +56,6 @@ class SharedContext : public WeakSingleton<SharedContext> {
5656
using Map = std::unordered_map<Key, Value, Hash>;
5757
void writeMetadataToBinaryFile(SharedContext& shared_context, const Metadata::Map& metadata);
5858
void readMetadataFromBinaryFile(SharedContext& shared_context, Metadata::Map& metadata);
59-
// friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata);
60-
// friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata);
6159
};
6260

6361
struct SubgraphMetadata {
@@ -78,9 +76,7 @@ class SharedContext : public WeakSingleton<SharedContext> {
7876
void writeSubgraphDataToBinaryFile(SharedContext& shared_context,
7977
const SubgraphMetadata::Map& subgraph_metadata);
8078
void readSubgraphDataFromBinaryFile(SharedContext& shared_context,
81-
SubgraphMetadata::Map& subgraph_metadata);
82-
// friend std::ostream& operator<<(std::ostream& right, const SubgraphMetadata::Map& subgraph_metadata);
83-
// friend std::istream& operator>>(std::istream& right, SubgraphMetadata::Map& subgraph_metadata);
79+
SubgraphMetadata::Map& subgraph_metadata);
8480
};
8581

8682
struct WeightsFile {
@@ -103,56 +99,49 @@ class SharedContext : public WeakSingleton<SharedContext> {
10399
SharedBinFile() = default; // Default constructor
104100
~SharedBinFile() {
105101
if (bin_file_.is_open()) {
106-
bin_file_.close(); // Close file when object is destroyed
102+
bin_file_.close(); // Close file when object is destroyed
107103
}
108104
}
109105

110106
void openBinFile(const fs::path shared_bin_filename) {
111-
// Check if the file exists before trying to open
107+
// Check if the file exists before trying to open
112108
if (!fs::exists(shared_bin_filename)) {
113-
std::cerr << "Error: The file does not exist at path: " << shared_bin_filename << std::endl;
114-
std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file
115-
if (!createFile) {
116-
throw std::runtime_error("Failed to create the file!");
117-
}
118-
createFile.close();
119-
// throw std::runtime_error("Failed to open log file! File does not exist.");
109+
std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file
110+
if (!createFile) {
111+
throw std::runtime_error("Failed to create the file!");
112+
}
113+
createFile.close();
120114
}
121115

122116
// Check if the file is accessible for reading and writing
123117
fs::perms file_perms = fs::status(shared_bin_filename).permissions();
124118

125119
if ((file_perms & fs::perms::owner_read) == fs::perms::none ||
126120
(file_perms & fs::perms::owner_write) == fs::perms::none) {
127-
std::cerr << "Error: Insufficient permissions for file: " << shared_bin_filename << std::endl;
128-
throw std::runtime_error("Failed to open log file! Insufficient permissions.");
121+
std::cerr << "Error: Insufficient permissions for file: " << shared_bin_filename << std::endl;
122+
throw std::runtime_error("Failed to open log file! Insufficient permissions.");
129123
}
130124

131-
132125
if (!bin_file_.is_open()) { // Prevent reopening
133-
std::cout << " Bin file is not open " << std::endl;
134126
bin_file_.open(shared_bin_filename, std::ios::in | std::ios::out | std::ios::binary);
135-
std::cout << " bin file opened " << std::endl;
136127
bin_size_ = bin_file_.seekg(0, std::ios::end).tellg();
137-
138-
std::cout << " bin size = " << bin_size_ << std::endl;
139128
bin_file_.seekg(0, std::ios::beg); // Reset to the beginning of the file
140129

141-
142130
if (!bin_file_) {
143-
throw std::runtime_error("Failed to open log file!");
131+
throw std::runtime_error("Failed to open log file!");
144132
}
145133
}
146134
}
147-
}shared_bin_file;
135+
void readBinFile(SharedContext& shared_context_);
136+
} shared_bin_file;
148137

149138
fs::path external_weight_filename;
150139
std::unique_ptr<WeightsFile> mapped_weights;
151140
Metadata metadata_;
152141
Metadata::Map metadata;
153142
SubgraphMetadata subgraph_metadata_;
154143
SubgraphMetadata::Map subgraph_metadata;
155-
}shared_weights;
144+
} shared_weights;
156145
};
157146

158147
using config_t = std::map<std::string, ov::AnyMap>;

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,10 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer,
9999
return Status::OK();
100100
}
101101

102-
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
102+
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(SharedContext& shared_context_,
103+
const std::filesystem::path& so_context_file_path,
104+
const std::string& subgraph_name,
105+
const GraphViewer& graph_viewer) const {
103106
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
104107
auto node = graph_viewer.GetNode(first_index);
105108
ORT_ENFORCE(node != nullptr);
@@ -121,7 +124,40 @@ std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesy
121124
}
122125
blob_filepath = blob_filepath.parent_path() / ep_cache_context;
123126
ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string());
124-
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
127+
std::cout << " blob_filepath " << blob_filepath.filename().string() << std::endl;
128+
std::cout << " shared bin filename = " << shared_context_.shared_weights.shared_bin_file.shared_bin_filename.filename().string() << std::endl;
129+
if (blob_filepath == shared_context_.shared_weights.shared_bin_file.shared_bin_filename) {
130+
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from Shared bin file - " << blob_filepath;
131+
auto& sb = shared_context_.shared_weights.shared_bin_file;
132+
// check if size of bin file is greater than the header as it gets written at the begining
133+
ORT_ENFORCE(sb.bin_size_ > 8, " Bin file is empty. Regenerate the epctx model. Bin file path : ", blob_filepath.string());
134+
auto subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
135+
using Key = SharedContext::SharedWeights::SubgraphMetadata::Key;
136+
std::cout << " subgraph name = " << subgraph_name << std::endl;
137+
const auto subgraph_key = Key{subgraph_name};
138+
auto it = subgraph_metadata.find(subgraph_key);
139+
if (it != subgraph_metadata.end()) {
140+
auto& value = it->second;
141+
std::cout << " value.epctx_offset = " << value.epctx_offset << std::endl;
142+
std::cout << " value.epctx_length = " << value.epctx_length << std::endl;
143+
std::cout << " sb.bin_size_ = " << sb.bin_size_ << std::endl;
144+
145+
if (value.epctx_offset < sb.bin_size_ && value.epctx_length <= sb.bin_size_ &&
146+
(value.epctx_offset <= sb.bin_size_ - value.epctx_length)) {
147+
sb.bin_file_.seekg(value.epctx_offset); // Move to the specified offset
148+
std::string buffer(value.epctx_length, '\0'); // preallocate space
149+
sb.bin_file_.read(&buffer[0], value.epctx_length); // Read the specified length
150+
// Adjust string size in case of a short read
151+
buffer.resize(sb.bin_file_.gcount());
152+
std::cout << " Read epctx into stream " << std::endl;
153+
result.reset((std::istream*)new std::istringstream(buffer));
154+
}
155+
}
156+
ORT_ENFORCE(result != nullptr, " Epctx blob is not read. Check bin file correctness from Bin path: ",
157+
blob_filepath.string());
158+
} else {
159+
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
160+
}
125161
}
126162
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
127163
return result;

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
#include <sstream>
77
#include <string>
88
#include <memory>
9+
#include <streambuf>
910

1011
#include "core/providers/shared_library/provider_api.h"
12+
#include "core/providers/openvino/contexts.h"
1113

1214
namespace onnxruntime {
1315
namespace openvino_ep {
@@ -31,7 +33,10 @@ class EPCtxHandler {
3133
const std::string& graph_name,
3234
const bool embed_mode,
3335
std::string&& model_blob_str) const;
34-
std::unique_ptr<std::istream> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const;
36+
std::unique_ptr<std::istream> GetModelBlobStream(SharedContext& shared_context_,
37+
const std::filesystem::path& so_context_file_path,
38+
const std::string& subgraph_name,
39+
const GraphViewer& graph_viewer) const;
3540
InlinedVector<const Node*> GetEPCtxNodes() const;
3641

3742
private:
@@ -40,5 +45,26 @@ class EPCtxHandler {
4045
const logging::Logger& logger_;
4146
};
4247

48+
// class LimitedFileStreambuf : public std::streambuf {
49+
// private:
50+
// std::fstream& file; // Reference to the existing file stream
51+
// long start, end; // Start and end positions
52+
53+
// protected:
54+
// int_type underflow() override {
55+
// if (file.tellg() >= end || file.eof())
56+
// return traits_type::eof(); // Stop reading if we reach the limit
57+
58+
// return file.get(); // Read next character directly from the file
59+
// }
60+
61+
// public:
62+
// LimitedFileStreambuf(std::fstream& bin_file_, long start, long end)
63+
// : file(bin_file_), start(start), end(end) {
64+
// file.clear(); // Clear error flags in case of previous reads
65+
// file.seekg(start); // Move file pointer to the start position
66+
// }
67+
// };
68+
4369
} // namespace openvino_ep
4470
} // namespace onnxruntime

0 commit comments

Comments
 (0)