Skip to content

Commit d304d7e

Browse files
committed
Fix the reading of epctx blob using stream
1 parent 6e52fae commit d304d7e

File tree

10 files changed

+389
-338
lines changed

10 files changed

+389
-338
lines changed

onnxruntime/core/providers/openvino/backend_manager.cc

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -77,7 +77,13 @@ BackendManager::BackendManager(SessionContext& session_context,
7777
ptr_stream_t model_stream;
7878
std::unique_ptr<onnx::ModelProto> model_proto;
7979
if (subgraph_context_.is_ep_ctx_graph) {
80-
model_stream = ep_ctx_handle_.GetModelBlobStream(session_context_.so_context_file_path, subgraph);
80+
std::cout << " inside is_ep_ctx_graph " << std::endl;
81+
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(session_context_.onnx_model_path_name.filename().string());
82+
auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name;
83+
model_stream = ep_ctx_handle_.GetModelBlobStream(shared_context_,
84+
session_context_.so_context_file_path,
85+
subgraph_name,
86+
subgraph);
8187
} else {
8288
model_proto = GetModelProtoFromFusedNode(fused_node, subgraph, logger);
8389
}
@@ -97,7 +103,9 @@ BackendManager::BackendManager(SessionContext& session_context,
97103
if (!sw.mapped_weights) {
98104
sw.mapped_weights = std::make_unique<SharedContext::SharedWeights::WeightsFile>(weight_filename);
99105
}
106+
std::cout << " Call createOVTensors in backend_manager.cc" << std::endl;
100107
backend_utils::CreateOVTensors(session_context_.device_type, sw.metadata, *sw.mapped_weights);
108+
std::cout << " create OVTensors successful " << std::endl;
101109
}
102110
}
103111

@@ -198,6 +206,14 @@ BackendManager::BackendManager(SessionContext& session_context,
198206
}
199207
}
200208

209+
std::string BackendManager::stripAfterFirstDot(std::string filename) {
210+
size_t dotPos = filename.find('.'); // Find first dot
211+
if (dotPos == std::string::npos) {
212+
return filename; // No dot found, return full filename
213+
}
214+
return filename.substr(0, dotPos); // Return everything before first dot
215+
}
216+
201217
// Call EPContext model exporter here if the provider option for exporting
202218
// precompiled blob is set. If that's the case:
203219
// By default, create model in embed mode where the blob stream is exported as data within
@@ -211,27 +227,33 @@ Status BackendManager::ExportCompiledBlobAsEPCtxNode(const onnxruntime::GraphVie
211227
ORT_THROW(exception_str);
212228
}
213229

230+
std::cout << " inside export compiled model " << std::endl;
231+
214232
// If embed_mode, then pass on the serialized blob
215233
// If not embed_mode, dump the blob here and only pass on the path to the blob
216234
std::string model_blob_str;
217235
auto compiled_model = concrete_backend_->GetOVCompiledModel();
218-
if (session_context_.so_share_ep_contexts){
219-
// std::ostringstream model_blob_stream;
220-
// compiled_model.export_model(model_blob_stream);
236+
if (session_context_.so_share_ep_contexts) {
237+
std::ostringstream model_blob_stream;
238+
compiled_model.export_model(model_blob_stream);
239+
std::cout << " inside export compiled model - share ep contexts" << std::endl;
221240

222241
// std::ofstream file(metadata_filename, std::ios::app| std::ios::binary);
223242
// std::cout << " write to metadata bin - " << metadata_filename << std::endl;
224243
auto& subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
225-
226-
sw::SubgraphMetadata::Map::key_type key{subgraph_context_.subgraph_name};
244+
std::string model_name = onnxruntime::openvino_ep::BackendManager::stripAfterFirstDot(session_context_.onnx_model_path_name.filename().string());
245+
auto subgraph_name = model_name + "_" + subgraph_context_.subgraph_name;
246+
sw::SubgraphMetadata::Map::key_type key{subgraph_name};
227247
sw::SubgraphMetadata::Map::mapped_type value{};
228248

229249
auto& bin_file = shared_context_.shared_weights.shared_bin_file.bin_file_;
230-
if (bin_file.is_open()) {
250+
std::cout << " subgraph name " << subgraph_name << "key = " << key.name << " For bin write " << std::endl;
251+
if (!subgraph_metadata.contains(key) && bin_file.is_open()) {
231252
// std::cout << "Current offset before "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl;
232253
value.epctx_offset = bin_file.tellp();
233-
// bin_file << model_blob_stream.str();
234-
compiled_model.export_model(bin_file);
254+
std::cout << " bin file location for writing subgraph = " << bin_file.tellp() << std::endl;
255+
bin_file << model_blob_stream.str();
256+
// compiled_model.export_model(bin_file);
235257
// std::cout << "Current offset after "<< subgraph_context_.subgraph_name << " = " << bin_file.tellp() << std::endl;
236258
value.epctx_length = static_cast<size_t>(static_cast<std::streamoff>(bin_file.tellp()) - value.epctx_offset);
237259
// std::cout << "Key = " << key.name << " Offset = " << value.epctx_offset << " , length = " << value.epctx_length << std::endl;

onnxruntime/core/providers/openvino/backend_manager.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,8 @@ class BackendManager {
4747
ReWriteInputShapeInfo(const ONNX_NAMESPACE::ModelProto& model_proto,
4848
const std::vector<std::vector<int64_t>>& input_shapes);
4949

50+
std::string stripAfterFirstDot(std::string filename);
51+
5052
std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_;
5153
std::shared_ptr<IBackend> concrete_backend_;
5254
std::map<std::string, std::shared_ptr<IBackend>> backend_map_;

onnxruntime/core/providers/openvino/backend_utils.cc

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -303,22 +303,33 @@ void CreateOVTensors(const std::string& device_name,
303303
SharedContext::SharedWeights::Metadata::Map& metadata_map,
304304
SharedContext::SharedWeights::WeightsFile& weights) {
305305
for (auto& [key, value] : metadata_map) {
306-
if (value.tensor) continue;
306+
// std::cout << " Key = " << key.name << std::endl;
307+
if (value.tensor) {
308+
// std::cout << " Value already present for key = " << key.name << std::endl;
309+
continue;
310+
}
307311

308312
// Get element data type
313+
// std::cout << " value element type = " << value.element_type << std::endl;
309314
auto onnx_element_type = (ONNX_NAMESPACE::TensorProto_DataType)value.element_type;
310315

311316
ov::element::Type ov_elementType = GetOpenVINOElementType(onnx_element_type); // Map to OpenVINO data type
312-
317+
// std::cout << "value dimensions = " << std::endl;
318+
// for (auto dim:value.dimensions){
319+
// std::cout << dim << std::endl;
320+
// }
313321
// Create OpenVINO Tensor
314322
if (device_name == "NPU") {
315323
// Use remote tensors
316324
auto npu_context = OVCore::Get()->core.get_default_context("NPU").as<ov::intel_npu::level_zero::ZeroContext>();
317325
auto&& remote_tensor = npu_context.create_l0_host_tensor(ov_elementType, value.dimensions, ov::intel_npu::TensorType::INPUT);
318-
326+
// std::cout << " Remote tensor created " << std::endl;
319327
// Copy data to remote tensor
328+
// std::cout << " value size = " << value.size << std::endl;
320329
weights.load_weights(value.data_offset, remote_tensor.get(), value.size);
321330
value.tensor = std::make_shared<ov::Tensor>(remote_tensor);
331+
// std::cout << " value tensor created " << std::endl;
332+
322333
} else {
323334
// Use vanilla tensors
324335
value.tensor = std::make_shared<ov::Tensor>(ov_elementType, value.dimensions);

onnxruntime/core/providers/openvino/backends/basic_backend.cc

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -116,6 +116,7 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
116116
ov_model, hw_target, device_config, subgraph_context_.subgraph_name);
117117
}
118118
#endif
119+
std::cout << " loaded model to the plugin " << std::endl;
119120
LOGS_DEFAULT(INFO) << log_tag << "Loaded model to the plugin";
120121
} catch (const char* msg) {
121122
ORT_THROW(msg);
@@ -127,11 +128,14 @@ BasicBackend::BasicBackend(std::unique_ptr<ONNX_NAMESPACE::ModelProto>& model_pr
127128
if (session_context_.so_share_ep_contexts) {
128129
initializer = [&metadata](OVInferRequestPtr ir_ptr) {
129130
const auto input_count = ir_ptr->GetNumInputs();
131+
std::cout << " ov ir input count = " << input_count << std::endl;
130132
for (auto i = 0u; i < input_count; i++) {
131133
using Key = SharedContext::SharedWeights::Metadata::Key;
132134
const auto tensor_key = Key{ir_ptr->GetInputTensorName(i)};
133135
if (metadata.contains(tensor_key)) {
134136
auto& value = metadata.at(tensor_key);
137+
// ORT_ENFORCE(value.tensor->get_byte_size() == value.size, "Unexpected tensor size mismatch");
138+
std::cout << " value tensor is set with shape = " << value.tensor->get_byte_size() << " input size from metadata = " << value.size << std::endl;
135139
ir_ptr->SetTensor(tensor_key.name, value.tensor);
136140
}
137141
}

onnxruntime/core/providers/openvino/contexts.h

Lines changed: 18 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -26,15 +26,15 @@ class SharedContext : public WeakSingleton<SharedContext> {
2626
SharedContext() : OVCore_(OVCore::Get()) {}
2727
struct SharedWeights {
2828
struct Header {
29-
uint32_t bin_version=1;
30-
long footer_offset=0;
31-
}header_;
29+
uint32_t bin_version = 1;
30+
long footer_offset = 0;
31+
} header_;
3232
struct Footer {
3333
long subgraph_offset;
3434
size_t subgraph_length;
3535
long metadata_offset;
3636
size_t metadata_length;
37-
}footer_;
37+
} footer_;
3838

3939
struct Metadata {
4040
struct Key {
@@ -57,8 +57,6 @@ class SharedContext : public WeakSingleton<SharedContext> {
5757
using Map = std::unordered_map<Key, Value, Hash>;
5858
void writeMetadataToBinaryFile(SharedContext& shared_context, const Metadata::Map& metadata);
5959
void readMetadataFromBinaryFile(SharedContext& shared_context, Metadata::Map& metadata);
60-
// friend std::ostream& operator<<(std::ostream& right, const Metadata::Map& metadata);
61-
// friend std::istream& operator>>(std::istream& right, Metadata::Map& metadata);
6260
};
6361

6462
struct SubgraphMetadata {
@@ -79,9 +77,7 @@ class SharedContext : public WeakSingleton<SharedContext> {
7977
void writeSubgraphDataToBinaryFile(SharedContext& shared_context,
8078
const SubgraphMetadata::Map& subgraph_metadata);
8179
void readSubgraphDataFromBinaryFile(SharedContext& shared_context,
82-
SubgraphMetadata::Map& subgraph_metadata);
83-
// friend std::ostream& operator<<(std::ostream& right, const SubgraphMetadata::Map& subgraph_metadata);
84-
// friend std::istream& operator>>(std::istream& right, SubgraphMetadata::Map& subgraph_metadata);
80+
SubgraphMetadata::Map& subgraph_metadata);
8581
};
8682

8783
struct WeightsFile {
@@ -104,56 +100,49 @@ class SharedContext : public WeakSingleton<SharedContext> {
104100
SharedBinFile() = default; // Default constructor
105101
~SharedBinFile() {
106102
if (bin_file_.is_open()) {
107-
bin_file_.close(); // Close file when object is destroyed
103+
bin_file_.close(); // Close file when object is destroyed
108104
}
109105
}
110106

111107
void openBinFile(const fs::path shared_bin_filename) {
112-
// Check if the file exists before trying to open
108+
// Check if the file exists before trying to open
113109
if (!fs::exists(shared_bin_filename)) {
114-
std::cerr << "Error: The file does not exist at path: " << shared_bin_filename << std::endl;
115-
std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file
116-
if (!createFile) {
117-
throw std::runtime_error("Failed to create the file!");
118-
}
119-
createFile.close();
120-
// throw std::runtime_error("Failed to open log file! File does not exist.");
110+
std::ofstream createFile(shared_bin_filename, std::ios::binary); // Create an empty binary file
111+
if (!createFile) {
112+
throw std::runtime_error("Failed to create the file!");
113+
}
114+
createFile.close();
121115
}
122116

123117
// Check if the file is accessible for reading and writing
124118
fs::perms file_perms = fs::status(shared_bin_filename).permissions();
125119

126120
if ((file_perms & fs::perms::owner_read) == fs::perms::none ||
127121
(file_perms & fs::perms::owner_write) == fs::perms::none) {
128-
std::cerr << "Error: Insufficient permissions for file: " << shared_bin_filename << std::endl;
129-
throw std::runtime_error("Failed to open log file! Insufficient permissions.");
122+
std::cerr << "Error: Insufficient permissions for file: " << shared_bin_filename << std::endl;
123+
throw std::runtime_error("Failed to open log file! Insufficient permissions.");
130124
}
131125

132-
133126
if (!bin_file_.is_open()) { // Prevent reopening
134-
std::cout << " Bin file is not open " << std::endl;
135127
bin_file_.open(shared_bin_filename, std::ios::in | std::ios::out | std::ios::binary);
136-
std::cout << " bin file opened " << std::endl;
137128
bin_size_ = bin_file_.seekg(0, std::ios::end).tellg();
138-
139-
std::cout << " bin size = " << bin_size_ << std::endl;
140129
bin_file_.seekg(0, std::ios::beg); // Reset to the beginning of the file
141130

142-
143131
if (!bin_file_) {
144-
throw std::runtime_error("Failed to open log file!");
132+
throw std::runtime_error("Failed to open log file!");
145133
}
146134
}
147135
}
148-
}shared_bin_file;
136+
void readBinFile(SharedContext& shared_context_);
137+
} shared_bin_file;
149138

150139
fs::path external_weight_filename;
151140
std::unique_ptr<WeightsFile> mapped_weights;
152141
Metadata metadata_;
153142
Metadata::Map metadata;
154143
SubgraphMetadata subgraph_metadata_;
155144
SubgraphMetadata::Map subgraph_metadata;
156-
}shared_weights;
145+
} shared_weights;
157146
};
158147

159148
using config_t = std::map<std::string, ov::AnyMap>;

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.cc

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,10 @@ Status EPCtxHandler::AddOVEPCtxNodeToGraph(const GraphViewer& graph_viewer,
9999
return Status::OK();
100100
}
101101

102-
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const {
102+
std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(SharedContext& shared_context_,
103+
const std::filesystem::path& so_context_file_path,
104+
const std::string& subgraph_name,
105+
const GraphViewer& graph_viewer) const {
103106
auto first_index = *graph_viewer.GetNodesInTopologicalOrder().begin();
104107
auto node = graph_viewer.GetNode(first_index);
105108
ORT_ENFORCE(node != nullptr);
@@ -121,7 +124,40 @@ std::unique_ptr<std::istream> EPCtxHandler::GetModelBlobStream(const std::filesy
121124
}
122125
blob_filepath = blob_filepath.parent_path() / ep_cache_context;
123126
ORT_ENFORCE(std::filesystem::exists(blob_filepath), "Blob file not found: ", blob_filepath.string());
124-
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
127+
std::cout << " blob_filepath " << blob_filepath.filename().string() << std::endl;
128+
std::cout << " shared bin filename = " << shared_context_.shared_weights.shared_bin_file.shared_bin_filename.filename().string() << std::endl;
129+
if (blob_filepath == shared_context_.shared_weights.shared_bin_file.shared_bin_filename) {
130+
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from Shared bin file - " << blob_filepath;
131+
auto& sb = shared_context_.shared_weights.shared_bin_file;
132+
// check if size of bin file is greater than the header as it gets written at the begining
133+
ORT_ENFORCE(sb.bin_size_ > 8, " Bin file is empty. Regenerate the epctx model. Bin file path : ", blob_filepath.string());
134+
auto subgraph_metadata = shared_context_.shared_weights.subgraph_metadata;
135+
using Key = SharedContext::SharedWeights::SubgraphMetadata::Key;
136+
std::cout << " subgraph name = " << subgraph_name << std::endl;
137+
const auto subgraph_key = Key{subgraph_name};
138+
auto it = subgraph_metadata.find(subgraph_key);
139+
if (it != subgraph_metadata.end()) {
140+
auto& value = it->second;
141+
std::cout << " value.epctx_offset = " << value.epctx_offset << std::endl;
142+
std::cout << " value.epctx_length = " << value.epctx_length << std::endl;
143+
std::cout << " sb.bin_size_ = " << sb.bin_size_ << std::endl;
144+
145+
if (value.epctx_offset < sb.bin_size_ && value.epctx_length <= sb.bin_size_ &&
146+
(value.epctx_offset <= sb.bin_size_ - value.epctx_length)) {
147+
sb.bin_file_.seekg(value.epctx_offset); // Move to the specified offset
148+
std::string buffer(value.epctx_length, '\0'); // preallocate space
149+
sb.bin_file_.read(&buffer[0], value.epctx_length); // Read the specified length
150+
// Adjust string size in case of a short read
151+
buffer.resize(sb.bin_file_.gcount());
152+
std::cout << " Read epctx into stream " << std::endl;
153+
result.reset((std::istream*)new std::istringstream(buffer));
154+
}
155+
}
156+
ORT_ENFORCE(result != nullptr, " Epctx blob is not read. Check bin file correctness from Bin path: ",
157+
blob_filepath.string());
158+
} else {
159+
result.reset((std::istream*)new std::ifstream(blob_filepath, std::ios_base::binary | std::ios_base::in));
160+
}
125161
}
126162
LOGS_DEFAULT(VERBOSE) << "[OpenVINO EP] Read blob from EPContext Node";
127163
return result;

onnxruntime/core/providers/openvino/onnx_ctx_model_helper.h

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,10 @@
66
#include <sstream>
77
#include <string>
88
#include <memory>
9+
#include <streambuf>
910

1011
#include "core/providers/shared_library/provider_api.h"
12+
#include "core/providers/openvino/contexts.h"
1113

1214
namespace onnxruntime {
1315
namespace openvino_ep {
@@ -31,7 +33,10 @@ class EPCtxHandler {
3133
const std::string& graph_name,
3234
const bool embed_mode,
3335
std::string&& model_blob_str) const;
34-
std::unique_ptr<std::istream> GetModelBlobStream(const std::filesystem::path& so_context_file_path, const GraphViewer& graph_viewer) const;
36+
std::unique_ptr<std::istream> GetModelBlobStream(SharedContext& shared_context_,
37+
const std::filesystem::path& so_context_file_path,
38+
const std::string& subgraph_name,
39+
const GraphViewer& graph_viewer) const;
3540
InlinedVector<const Node*> GetEPCtxNodes() const;
3641

3742
private:
@@ -40,5 +45,26 @@ class EPCtxHandler {
4045
const logging::Logger& logger_;
4146
};
4247

48+
// class LimitedFileStreambuf : public std::streambuf {
49+
// private:
50+
// std::fstream& file; // Reference to the existing file stream
51+
// long start, end; // Start and end positions
52+
53+
// protected:
54+
// int_type underflow() override {
55+
// if (file.tellg() >= end || file.eof())
56+
// return traits_type::eof(); // Stop reading if we reach the limit
57+
58+
// return file.get(); // Read next character directly from the file
59+
// }
60+
61+
// public:
62+
// LimitedFileStreambuf(std::fstream& bin_file_, long start, long end)
63+
// : file(bin_file_), start(start), end(end) {
64+
// file.clear(); // Clear error flags in case of previous reads
65+
// file.seekg(start); // Move file pointer to the start position
66+
// }
67+
// };
68+
4369
} // namespace openvino_ep
4470
} // namespace onnxruntime

0 commit comments

Comments
 (0)