intel · sfatimar · Jun 19, 2025 · May 7, 2025 · May 8, 2025 · May 8, 2025
diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -44,10 +44,6 @@ BackendManager::BackendManager(SessionContext& session_context,
                                                               shared_context_{shared_context} {
   subgraph_context_.is_ep_ctx_graph = ep_ctx_handle_.CheckForOVEPCtxNodeInGraph(subgraph);
 
-  bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
-                    session_context_.device_type.find("GPU") != std::string::npos;
-  bool npu = session_context_.device_type.find("NPU") != std::string::npos;
-
   subgraph_context_.model_precision = [&](const GraphViewer& graph_viewer) {
     // return empty if graph has no inputs or if types are not one of FP32/FP16
     // else assume the type of the first input
@@ -109,8 +105,7 @@ BackendManager::BackendManager(SessionContext& session_context,
   if (ModelHasSymbolicInputDims(subgraph)) {
     subgraph_context_.has_dynamic_input_shape = true;
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims";
-    if (cpu_or_gpu || (npu && session_context_.enable_causallm) &&
-        !session_context_.disable_dynamic_shapes) {
+    if (!session_context_.disable_dynamic_shapes) {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
                          << "Creating backend Dynamic Shapes";
       try {
@@ -483,30 +478,34 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
 void BackendManager::Compute(OrtKernelContext* context) {
   Ort::KernelContext ctx(context);
   std::chrono::high_resolution_clock::time_point start_compute, end_compute;
-  bool cpu_or_gpu = session_context_.device_type.find("CPU") != std::string::npos ||
-                    session_context_.device_type.find("GPU") != std::string::npos;
-  bool npu = session_context_.device_type.find("NPU") != std::string::npos;
+
 #ifdef OPENVINO_FIL_ENABLED
   static bool fil_enabled = true;
   if (fil_enabled) {
     start_compute = std::chrono::high_resolution_clock::now();
     LOGS_DEFAULT(INFO) << "Start Compute";
   }
 #endif
-  // OV NPU doesn't support dynamic shaped model inference.
+
   // if disable_dynamic_shapes is set to true then execution of dynamic model is done
   // by rewriting the model to static shaped model at runtime based on input shape.
-  // disable_dynamic_shapes is always set to true for OV NPU plugin.
-  if (subgraph_context_.has_dynamic_input_shape &&
-      !session_context_.disable_dynamic_shapes &&
-      (cpu_or_gpu || (npu && session_context_.enable_causallm))) {
+  // disable_dynamic_shapes should be set for devices that don't support dynamic shapes.
+  bool need_dynamic_backend = subgraph_context_.has_dynamic_input_shape &&
+                              session_context_.disable_dynamic_shapes;
+
+  if (!need_dynamic_backend) {
     concrete_backend_->Infer(context);
-  } else if (subgraph_context_.has_dynamic_input_shape) {
+  } else {
     std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(ctx);
     auto key = MakeMapKeyString(tensor_shapes, session_context_.device_type);
     std::shared_ptr<IBackend> dynamic_backend;
-    auto search = backend_map_.find(key);
-    if (search == backend_map_.end()) {
+
+    {
+      std::unique_lock<std::mutex> lock(mutex_);
+      dynamic_backend = backend_map_[key];
+    }
+
+    if (!dynamic_backend) {
       ptr_stream_t model_stream;
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
                          << "Creating dynamic backend for key: " << key;
@@ -547,14 +546,11 @@ void BackendManager::Compute(OrtKernelContext* context) {
         }
 #endif
       }
+      std::unique_lock<std::mutex> lock(mutex_);
       backend_map_.insert({key, dynamic_backend});
-    } else {
-      dynamic_backend = search->second;
     }
 
     dynamic_backend->Infer(context);
-  } else {
-    concrete_backend_->Infer(context);
   }
 #ifdef OPENVINO_FIL_ENABLED
   if (fil_enabled) {

diff --git a/onnxruntime/core/providers/openvino/backend_manager.h b/onnxruntime/core/providers/openvino/backend_manager.h
@@ -50,6 +50,7 @@ class BackendManager {
 
   std::unique_ptr<ONNX_NAMESPACE::ModelProto> model_proto_;
   std::shared_ptr<IBackend> concrete_backend_;
+  std::mutex mutex_;
   std::map<std::string, std::shared_ptr<IBackend>> backend_map_;
   SubGraphContext subgraph_context_;
   EPCtxHandler& ep_ctx_handle_;

diff --git a/onnxruntime/core/providers/openvino/backend_utils.cc b/onnxruntime/core/providers/openvino/backend_utils.cc
@@ -175,32 +175,6 @@ CreateOVModel(std::string&& model,
   }
 }
 
-Ort::UnownedValue
-GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
-                OVInferRequestPtr infer_request,
-                std::string output_name,
-                const SubGraphContext::string_index_map_t& output_names) {
-  auto graph_output_blob = infer_request->GetTensor(output_name);
-
-  auto graph_output_dims = graph_output_blob->get_shape();
-
-  if (batch_size > 1) {
-    // Add the batch size as dim 0.
-    graph_output_dims.insert(graph_output_dims.begin(), batch_size);
-  }
-  size_t num_dims = graph_output_dims.size();
-  std::unique_ptr<int64_t[]> output_shape(new int64_t[num_dims]);
-  for (size_t j = 0; j < num_dims; j++) {
-    output_shape[j] = static_cast<int64_t>(graph_output_dims[j]);
-  }
-  auto it = output_names.find(output_name);
-  if (it == output_names.end()) {
-    ORT_THROW(log_tag + "Output names mismatch between OpenVINO and ONNX");
-  }
-  int index = it->second;
-  return context.GetOutput(index, output_shape.get(), num_dims);
-}
-
 Ort::UnownedValue
 GetOutputTensor(Ort::KernelContext& context,
                 std::string output_name,
@@ -216,14 +190,9 @@ GetOutputTensor(Ort::KernelContext& context,
     ORT_THROW(log_tag + "Output names mismatch between OpenVINO and ONNX");
   }
   int index = it->second;
-  auto shape = node->get_shape();
+  auto output_shape = ParameterShape::ToOrtShape(node->get_shape());
 
-  size_t num_dims = shape.size();
-  std::unique_ptr<int64_t[]> output_shape(new int64_t[num_dims]);
-  for (size_t j = 0; j < num_dims; j++) {
-    output_shape[j] = static_cast<int64_t>(shape[j]);
-  }
-  return context.GetOutput(index, output_shape.get(), num_dims);
+  return context.GetOutput(index, output_shape);
 }
 
 int GetFirstAvailableDevice(SessionContext& session_context) {
@@ -308,15 +277,6 @@ void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
   std::memcpy(input_data, batch_memory_offset, input_data_size);
 }
 
-void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
-                    size_t batch_slice_idx) {
-  auto output_data = outputBlob->data();
-  size_t output_data_size = outputBlob->get_byte_size();
-  char* tensor_data = output_tensor.GetTensorMutableData<char>();
-  char* batch_memory_offset = tensor_data + output_data_size * batch_slice_idx;
-  std::memcpy(batch_memory_offset, output_data, output_data_size);
-}
-
 void printPerformanceCounts(const std::vector<OVProfilingInfo>& performanceMap,
                             std::ostream& stream, std::string deviceName) {
   int64_t totalTime = 0;

diff --git a/onnxruntime/core/providers/openvino/backend_utils.h b/onnxruntime/core/providers/openvino/backend_utils.h
@@ -27,8 +27,9 @@
 
 namespace onnxruntime {
 namespace openvino_ep {
+static constexpr std::string log_tag = "[OpenVINO-EP] ";
+
 namespace backend_utils {
-const std::string log_tag = "[OpenVINO-EP] ";
 
 bool IsDebugEnabled();
 
@@ -48,19 +49,10 @@ GetOutputTensor(Ort::KernelContext& context,
                 const SubGraphContext::string_index_map_t& output_names,
                 std::shared_ptr<ov::Node> node);
 
-Ort::UnownedValue
-GetOutputTensor(Ort::KernelContext& context, size_t batch_size,
-                OVInferRequestPtr infer_request,
-                std::string output_name,
-                const SubGraphContext::string_index_map_t& output_names);
-
 void FillInputBlob(OVTensorPtr inputBlob, size_t batch_slice_idx,
                    std::string input_name, Ort::KernelContext& context,
                    const SubGraphContext& subgraph_context);
 
-void FillOutputBlob(OVTensorPtr outputBlob, Ort::UnownedValue& output_tensor,
-                    size_t batch_slice_idx);
-
 std::shared_ptr<const OVNetwork>
 CreateOVModel(std::string&& model,
               const SessionContext& session_context,