-
Notifications
You must be signed in to change notification settings - Fork 37
Enable adaptive stripping and eliminate dependency of weight sharing feature on OVEP qdq stripping #629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable adaptive stripping and eliminate dependency of weight sharing feature on OVEP qdq stripping #629
Changes from all commits
35a8b37
0c600c3
b7b468b
810eba0
e2f814f
37ecdd5
2969048
89284a8
a8b7fb1
5f69392
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
#include "core/providers/openvino/ibackend.h" | ||
#include "core/providers/openvino/backend_utils.h" | ||
#include "core/providers/openvino/qdq_transformations/qdq_stripping.h" | ||
#include "core/providers/openvino/ov_interface.h" | ||
|
||
namespace onnxruntime { | ||
namespace openvino_ep { | ||
|
@@ -359,22 +360,37 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node, | |
} | ||
}; | ||
|
||
[[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph); | ||
[[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type); | ||
#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025)) | ||
if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) { | ||
if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) { | ||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: Compiler QDQ optimization pass is enabled"; | ||
OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)}); | ||
// disabling OVEP qdq stripping | ||
// at this stage provider option "enable_qdq_optimizer" is still true but OVEP stripping is (disabled) false | ||
// as compiler stripping is enabled | ||
enable_ovep_qdq_optimizer = false; | ||
} else { | ||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: OVEP QDQ optimization pass is enabled"; | ||
} | ||
Comment on lines
+374
to
+376
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Move back to implementation block |
||
} | ||
#endif | ||
|
||
const auto& onnx_model_path_name = subgraph.ModelPath(); | ||
// QDQ stripping enabled only for the NPU | ||
if (session_context_.device_type.find("NPU") != std::string::npos && | ||
session_context_.enable_qdq_optimizer && | ||
IsQDQGraph(subgraph)) { | ||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1"; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The message about QDQ being disabled is still there. Let's keep both in their respective implementation blocks. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We will get this addressed in the next PR we will initiate for refactoring. |
||
(enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) { | ||
std::unique_ptr<onnxruntime::Model> model; | ||
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights); | ||
Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights, enable_ovep_qdq_optimizer); | ||
auto model_proto = model->ToProto(); | ||
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); | ||
print_model_proto_duration(); | ||
DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node); | ||
ORT_ENFORCE(status.IsOK(), status.ErrorMessage()); | ||
return model_proto; | ||
} else { | ||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 0"; | ||
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled"; | ||
auto model = subgraph.CreateModel(logger); | ||
auto model_proto = model->ToProto(); | ||
model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION); | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@saurabhkale17 .. Is this OV version check required? I think it is redundant and can be optimized out.
Below line, would only enable the property if OV 2025.1 and driver supports QDQ stripping:
if (std::find(supported_properties.begin(), supported_properties.end(), "NPU_QDQ_OPTIMIZATION") != supported_properties.end()) {
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This OpenVINO version check is necessary to avoid compilation errors in older versions.
For older OV versions I get this error: C2039: 'qdq_optimization': is not a member of 'ov::intel_npu'
The reason for this behavior:
This check happens at runtime, because it queries the device's supported properties dynamically.
if (std::find(supported_properties.begin(), supported_properties.end(), "NPU_QDQ_OPTIMIZATION") != supported_properties.end())
However, the line
OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)});
happens at compile-time, since the compiler needs to resolve qdq_optimization(true) during compilation.
If ov::intel_npu::qdq_optimization(true) does not exist in an older OpenVINO version, the compiler treats it as an undefined symbol and throws an error before execution—long before the std::find(...) check can even run.
#if preprocessor checks ensures that qdq_optimization(true) is only compiled when the OpenVINO version supports it, preventing compilation failures in older versions.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Interesting. I was looking at the OV C/C++ API documentation to find out what the behavior is when an unregonized property is used but it's not described or undefined. I was trying to see if instead of doing a get_supported_properties ->set_property we could directly do something like
if (!set_property) then fallback
.