intel · MayureshV1 · Apr 7, 2025 · Mar 19, 2025 · Mar 25, 2025 · Mar 27, 2025
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -19,6 +19,7 @@
 #include "core/providers/openvino/ibackend.h"
 #include "core/providers/openvino/backend_utils.h"
 #include "core/providers/openvino/qdq_transformations/qdq_stripping.h"
+#include "core/providers/openvino/ov_interface.h"
 
 namespace onnxruntime {
 namespace openvino_ep {
@@ -359,22 +360,37 @@ BackendManager::GetModelProtoFromFusedNode(const onnxruntime::Node& fused_node,
     }
   };
 
+  [[maybe_unused]] bool enable_ovep_qdq_optimizer = session_context_.enable_qdq_optimizer && IsQDQGraph(subgraph);
+  [[maybe_unused]] std::optional<bool> enable_compiler_qdq_optimization = queryOVProperty("NPU_QDQ_OPTIMIZATION", session_context_.device_type);
+#if (((OPENVINO_VERSION_MAJOR == 2025) && (OPENVINO_VERSION_MINOR > 0)) || (OPENVINO_VERSION_MAJOR > 2025))
+  if (session_context_.device_type.find("NPU") != std::string::npos && session_context_.enable_qdq_optimizer) {
+    if (enable_compiler_qdq_optimization.has_value() && enable_compiler_qdq_optimization.value()) {
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: Compiler QDQ optimization pass is enabled";
+      OVCore::Get()->core.set_property("NPU", {ov::intel_npu::qdq_optimization(true)});
+      // disabling OVEP qdq stripping
+      // at this stage provider option "enable_qdq_optimizer" is still true but OVEP stripping is (disabled) false
+      // as compiler stripping is enabled
+      enable_ovep_qdq_optimizer = false;
+    } else {
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP]: OVEP QDQ optimization pass is enabled";
+    }
+  }
+#endif
+
   const auto& onnx_model_path_name = subgraph.ModelPath();
   // QDQ stripping enabled only for the NPU
   if (session_context_.device_type.find("NPU") != std::string::npos &&
-      session_context_.enable_qdq_optimizer &&
-      IsQDQGraph(subgraph)) {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 1";
+      (enable_ovep_qdq_optimizer || session_context_.so_share_ep_contexts)) {
     std::unique_ptr<onnxruntime::Model> model;
-    Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights);
+    Status status = CreateModelWithStrippedQDQNodes(subgraph, logger, session_context_.so_share_ep_contexts, model, shared_context_.shared_weights, enable_ovep_qdq_optimizer);
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);
     print_model_proto_duration();
     DumpOpenVINOEPModel(onnx_model_path_name, model_proto.get(), fused_node);
     ORT_ENFORCE(status.IsOK(), status.ErrorMessage());
     return model_proto;
   } else {
-    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] QDQ optimization pass status: 0";
+    LOGS_DEFAULT(INFO) << "[OpenVINO-EP] OVEP QDQ optimization pass is disabled";
     auto model = subgraph.CreateModel(logger);
     auto model_proto = model->ToProto();
     model_proto->set_ir_version(ONNX_NAMESPACE::Version::IR_VERSION);

diff --git a/onnxruntime/core/providers/openvino/ov_interface.cc b/onnxruntime/core/providers/openvino/ov_interface.cc
@@ -46,6 +46,17 @@ void printDebugInfo(const ov::CompiledModel& obj) {
 }
 #endif
 
+// Function to check if a given OV property is enabled
+std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type) {
+  try {
+      // Get the property value
+      auto supported_properties = OVCore::Get()->core.get_property(device_type, ov::supported_properties);
+      return std::find(supported_properties.begin(), supported_properties.end(), property) != supported_properties.end();
+  } catch (const std::exception&) {
+      return std::nullopt; // Property not found or invalid
+  }
+}
+
 std::shared_ptr<OVNetwork> OVCore::ReadModel(std::string&& model, const std::string& model_path) {
   try {
     std::istringstream modelStringStream(std::move(model));

diff --git a/onnxruntime/core/providers/openvino/ov_interface.h b/onnxruntime/core/providers/openvino/ov_interface.h
@@ -8,6 +8,7 @@
 #include <fstream>
 #include <sstream>
 #include <utility>
+#include <optional>
 
 #include "openvino/openvino.hpp"
 #include "openvino/runtime/intel_npu/properties.hpp"
@@ -37,6 +38,8 @@ typedef ov::intel_gpu::ocl::ClContext* OVRemoteContextPtr;
 typedef ov::RemoteContext OVRemoteContext;
 #endif
 
+std::optional<bool> queryOVProperty(const std::string& property, const std::string& device_type);
+
 template <typename T>
 class WeakSingleton {
  public:

diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.cc
@@ -341,6 +341,7 @@ static bool CheckDQRuleSet(const NodeUnit& node_unit,
   }
 }
 
+// this check is if QLinear node feed into the output of src graph which expects quantized output
 static bool CheckQFeedsIntoQuantizedOutput(const NodeUnit& node_unit,
                                            const std::unordered_map<std::string, std::string> graph_op_data_type) {
   auto op_of_quantized_layer = node_unit.Outputs();
@@ -447,9 +448,17 @@ static bool HandleDoubleQDQ(onnxruntime::Graph& dst_graph, const onnxruntime::Gr
 static void AddStandaloneNodeUnit(onnxruntime::Graph& dst_graph, const onnxruntime::GraphViewer& src_graph,
                                   const NodeUnit& node_unit,
                                   std::set<std::string>& initializers_to_keep,
-                                  const logging::Logger& /* logger */) {
+                                  const logging::Logger& /* logger */,
+                                  bool IsWeightSharingWithoutOVEPQDQStripping) {
   assert(node_unit.UnitType() == NodeUnit::Type::SingleNode);
 
+  // this is the scenario where WAI is enabled and ovep stripping is disabled
+  // do not strip off any Q or DQ node
+  if (IsWeightSharingWithoutOVEPQDQStripping) {
+    AddNode(initializers_to_keep, src_graph, dst_graph, node_unit.GetNode());
+    return;
+  }
+
   if (HandleDoubleQDQ(dst_graph, src_graph, node_unit, initializers_to_keep)) return;
 
   auto add_identity_op = [&](bool duplicate_dq) {
@@ -511,7 +520,8 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
                            const onnxruntime::GraphViewer& src_graph,
                            const NodeUnit& node_unit,
                            std::set<std::string>& initializers_to_keep,
-                           const logging::Logger& /* logger */) {
+                           const logging::Logger& /* logger */,
+                           bool IsWeightSharingWithoutOVEPQDQStripping) {
   assert(node_unit.UnitType() == NodeUnit::Type::QDQGroup);
 
   // Collect inputs coming into the node unit.
@@ -529,7 +539,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
     SkipReason reason = SkipReason::Other;
     bool keep_dq = CheckDQRuleSet(node_unit, dq_node, src_graph, reason);
 
-    if (keep_dq) {
+    if (IsWeightSharingWithoutOVEPQDQStripping || keep_dq) {
       AddNode(initializers_to_keep, src_graph, dst_graph, *dq_node);
       dq_node_args_to_keep.insert({input_defs.at(0)->Name(),
                                    &dst_graph.GetOrCreateNodeArg(dq_node->OutputDefs().at(0)->Name(),
@@ -597,7 +607,7 @@ static void AddQDQNodeUnit(onnxruntime::Graph& dst_graph,
 
       bool keep_q = CheckQRuleSet(node_unit, q_node, src_graph, reason);
 
-      if (keep_q) {
+      if (IsWeightSharingWithoutOVEPQDQStripping || keep_q) {
         AddNode(initializers_to_keep, src_graph, dst_graph, *q_node);
         // if keep_q, then output defs of the target node doesn't change
         output_args.push_back(&dst_graph.GetOrCreateNodeArg(target_node.OutputDefs().at(i)->Name(),
@@ -675,7 +685,8 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
                                        const logging::Logger& logger,
                                        bool enable_ovep_weight_sharing,
                                        /*out*/ std::unique_ptr<onnxruntime::Model>& model,
-                                       /*out*/ sw& shared_weights) {
+                                       /*out*/ sw& shared_weights,
+                                       bool enable_ovep_qdq_optimizer) {
   // NOTE: This function is a re-implementation of GraphViewerToProto() in core/graph/graph_proto_serializer.cc
   // with the following differences:
   //   - Uses onnxruntime::Graph APIs instead of onnx::GraphProto APIs.
@@ -766,10 +777,12 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
       continue;  // Already handled this node unit
     }
 
+    bool IsWeightSharingWithoutOVEPQDQStripping = enable_ovep_weight_sharing && !enable_ovep_qdq_optimizer;
+
     if (node_unit->UnitType() == NodeUnit::Type::SingleNode) {
-      AddStandaloneNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger);
+      AddStandaloneNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger, IsWeightSharingWithoutOVEPQDQStripping);
     } else {
-      AddQDQNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger);
+      AddQDQNodeUnit(dst_graph, src_graph, *node_unit, initializers_to_keep, logger, IsWeightSharingWithoutOVEPQDQStripping);
     }
 
     seen_node_units.insert(node_unit);

diff --git a/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h b/onnxruntime/core/providers/openvino/qdq_transformations/qdq_stripping.h
@@ -17,7 +17,8 @@ Status CreateModelWithStrippedQDQNodes(const GraphViewer& src_graph,
                                        const logging::Logger& logger,
                                        bool enable_ovep_weight_sharing,
                                        /*out*/ std::unique_ptr<onnxruntime::Model>& model,
-                                       /*out*/ sw& shared_weights);
+                                       /*out*/ sw& shared_weights,
+                                       bool enable_ovep_qdq_optimizer);
 
 bool dumpMetaDataMapToBinary(const sw::Metadata::Map& shared_weights, const std::string& filename);
 }  // namespace openvino_ep