Mohsin/enable dynamic shapes (#11867)

* Add pypi build changes to latest Master * Add ORT training part of OV build * Disabling SqueezeOpTest.BadAxes * Add ONNXruntime branch ARG to Docker build * Changes to include file details versions * Commit File Version Updates * Change naming for linux build * Add fix for pylint format errors * Fix pylint warnings. * Enable Dynamic Shapes for OV_API_20 * Update requirements.txt whl version- internal_ci fix * Update backend_manager.cc MYRIAD Fix * Update wheel version in requirements.txt * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update setup.py * Fix pylint warnings * Fix pylint warnings 2 * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com> Co-authored-by: mayavijx <mayax.vijayan@intel.com> Co-authored-by: Sahar Fatima <sfatima.3001@gmail.com> Co-authored-by: mohsinmx <mohsinx.mohammad@intel.com>
microsoft · Jun 21, 2022 · 61a74f2 · 61a74f2
1 parent b20daed
commit 61a74f2
Show file tree

Hide file tree

Showing 9 changed files with 138 additions and 31 deletions.
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -515,7 +515,9 @@ typedef struct OrtMIGraphXProviderOptions {
 */
 typedef struct OrtOpenVINOProviderOptions {
 #ifdef __cplusplus
-  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{}, context{}, enable_opencl_throttling{} {}
+  OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{},
+                                 num_of_threads{}, use_compiled_network{}, blob_dump_path{},
+                                 context{}, enable_opencl_throttling{}, enable_dynamic_shapes{} {}
 #endif
   /** \brief Device type string
   *
@@ -529,6 +531,7 @@ typedef struct OrtOpenVINOProviderOptions {
   const char* blob_dump_path;          // path is set to empty by default
   void* context;
   unsigned char enable_opencl_throttling; ///< 0 = disabled, nonzero = enabled
+  unsigned char enable_dynamic_shapes;  ///< 0 = disabled, nonzero = enabled
 } OrtOpenVINOProviderOptions;
 
 struct OrtApi;

diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc
@@ -1,9 +1,14 @@
 // Copyright (C) 2019-2022 Intel Corporation
 // Licensed under the MIT License
 
+#include <fstream>
+#include <vector>
+#include <string>
+#include <memory>
+
 #include "core/providers/shared_library/provider_api.h"
+
 #include <inference_engine.hpp>
-#include <fstream>
 
 #include "contexts.h"
 #include "backend_manager.h"
@@ -84,6 +89,26 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
     subgraph_context_.has_dynamic_input_shape = false;
 
   } else if (ModelHasSymbolicInputDims(subgraph)) {
+    subgraph_context_.has_dynamic_input_shape = true;
+    if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims."
+                            " Defering backend initialization and device_type is MYRIAD.";
+    }
+    if (GetGlobalContext().device_type.find("CPU") != std::string::npos) {
+      LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims and "
+                       << "device_type is CPU.";
+      #if (defined OV_API_20)
+        if (GetGlobalContext().enable_dynamic_shapes) {
+          LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
+                          << "Creating backend Dynamic Shapes";
+          concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), subgraph_context_);
+          LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
+                          << "Backend created for graph " << subgraph_context_.subgraph_name;
+        }
+      #endif
+    }
+  } else if (ModelHasSymbolicInputDims(subgraph) &&
+      GetGlobalContext().device_type.find("GPU") != std::string::npos) {
     LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims. Defering backend initialization";
     subgraph_context_.has_dynamic_input_shape = true;
   } else {
@@ -236,7 +261,15 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
 }
 
 void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) {
-  if (subgraph_context_.has_dynamic_input_shape) {
+  bool use_dynamic_backend = true;
+  if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
+      GetGlobalContext().device_type.find("CPU") != std::string::npos) {
+    #if (defined OV_API_20)
+      concrete_backend_->Infer(api, context);
+      use_dynamic_backend = false;
+    #endif
+  }
+  if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) {
     std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(api, context);
     auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);
 
@@ -251,9 +284,9 @@ void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) {
     auto search = backend_map_.find(key);
     if (search == backend_map_.end()) {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
-                         << "Creating concrete backend for key: " << key;
+                        << "Creating concrete backend for key: " << key;
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
-                         << "Backend created for graph " << subgraph_context_.subgraph_name;
+                        << "Backend created for graph " << subgraph_context_.subgraph_name;
       auto modelproto_with_concrete_shapes = ReWriteInputShapeInfo(*model_proto_, tensor_shapes);
       dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
                                                     GetGlobalContext(), subgraph_context_);

diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc
@@ -267,10 +267,29 @@ void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext*
     } else {
       ORT_THROW(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
     }
-    OVTensorPtr graph_input_blob; 
-    graph_input_blob = infer_request->GetTensor(input_name);
     size_t batch_slice_idx = 0;
-    FillInputBlob(graph_input_blob, batch_slice_idx, input_name, ort, context, subgraph_context_);
+    if (subgraph_context_.has_dynamic_input_shape &&
+       global_context_.enable_dynamic_shapes == true &&
+       global_context_.device_type.find("CPU") != std::string::npos) {
+      const OrtValue* tensor = ort.KernelContext_GetInput(context, subgraph_context_.input_names.at(input_name));
+      auto tensor_info = ort.GetTensorTypeAndShape(tensor);
+      auto tensor_shape = ort.GetTensorShape(tensor_info);
+      auto tensor_size = tensor_shape.size();
+      auto tensor_iter = 0;
+      ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
+      for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
+        input_tensor_shape[tensor_iter] = *i;
+        tensor_iter+=1;
+      }
+      auto input = ie_cnn_network_->get_parameters().at(0);
+      OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
+      FillInputBlob(tensor_ptr, batch_slice_idx, input_name, ort, context, subgraph_context_);
+      infer_request->SetTensor(input_name, tensor_ptr);
+    } else {
+      OVTensorPtr graph_input_blob;
+      graph_input_blob = infer_request->GetTensor(input_name);
+      FillInputBlob(graph_input_blob, batch_slice_idx, input_name, ort, context, subgraph_context_);
+    }
     input_idx++;
   }        
   #else

diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h
@@ -15,6 +15,7 @@ struct GlobalContext {
   bool enable_vpu_fast_compile = false;
   bool use_compiled_network = false;
   bool enable_opencl_throttling = false;
+  bool enable_dynamic_shapes = false;
   size_t num_of_threads;
   std::string device_type;
   std::string precision_str;

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc
@@ -20,6 +20,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
   openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
   openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
   openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
+  openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;
 
 
   if ((int)info.num_of_threads_ <= 0) {

diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h
@@ -60,9 +60,15 @@ struct OpenVINOExecutionProviderInfo {
   std::string blob_dump_path_;
   void* context_;
   bool enable_opencl_throttling_;
+  bool enable_dynamic_shapes_;
 
-  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path, void* context, bool enable_opencl_throttling)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context), enable_opencl_throttling_(enable_opencl_throttling) {
+  explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
+                                         size_t num_of_threads, bool use_compiled_network,
+                                         std::string blob_dump_path, void* context, bool enable_opencl_throttling,
+                                          bool enable_dynamic_shapes)
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads),
+       use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context),
+       enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
     if (dev_type == "") {
       LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
                          << "No runtime device selection option provided.";
@@ -130,7 +136,7 @@ struct OpenVINOExecutionProviderInfo {
                        << "Choosing Device: " << device_type_ << " , Precision: " << precision_;
   }
   OpenVINOExecutionProviderInfo() {
-    OpenVINOExecutionProviderInfo("", false, "", 0, false,"", NULL, false);
+    OpenVINOExecutionProviderInfo("", false, "", 0, false, "", NULL, false, false);
   }
 };
 

diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc
@@ -11,8 +11,10 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
                           const char* device_id, size_t num_of_threads,
                           bool use_compiled_network, const char* blob_dump_path, void* context,
-                          bool enable_opencl_throttling)
-      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), context_(context), enable_opencl_throttling_(enable_opencl_throttling) {
+                          bool enable_opencl_throttling, bool enable_dynamic_shapes)
+      : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads),
+        use_compiled_network_(use_compiled_network), context_(context),
+        enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
     device_type_ = (device_type == nullptr) ? "" : device_type;
     device_id_ = (device_id == nullptr) ? "" : device_id;
     blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
@@ -31,13 +33,25 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
   std::string blob_dump_path_;
   void* context_;
   bool enable_opencl_throttling_;
+  bool enable_dynamic_shapes_;
 };
 
 std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
-  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_);
+  OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
+                                     use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_,
+                                     enable_dynamic_shapes_);
   return std::make_unique<OpenVINOExecutionProvider>(info);
 }
 
+std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
+    const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads,
+    bool use_compiled_network, const char* blob_dump_path, void * context, bool enable_opencl_throttling,
+    bool enable_dynamic_shapes) {
+  return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile,
+  device_id, num_of_threads, use_compiled_network, blob_dump_path, context, enable_opencl_throttling,
+  enable_dynamic_shapes);
+}
+
 }  // namespace onnxruntime
 
 namespace onnxruntime {
@@ -53,7 +67,11 @@ struct OpenVINO_Provider : Provider {
 
   std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
     auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
-    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path, params.context, params.enable_opencl_throttling);
+    return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile,
+                                                     params.device_id, params.num_of_threads,
+                                                     params.use_compiled_network, params.blob_dump_path,
+                                                     params.context, params.enable_opencl_throttling,
+                                                     params.enable_dynamic_shapes);
   }
 
   void Initialize() override {

diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc
@@ -618,12 +618,20 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(
 
         } else if (option.first == "enable_opencl_throttling") {
           if (option.second == "True") {
-            params.use_compiled_network = true;
+            params.enable_opencl_throttling = true;
           } else if (option.second == "False") {
-            params.use_compiled_network = false;
+            params.enable_opencl_throttling = false;
           } else {
             ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second);
           }
+        } else if (option.first == "enable_dynamic_shapes") {
+          if (option.second == "True") {
+            params.enable_dynamic_shapes = true;
+          } else if (option.second == "False") {
+            params.enable_dynamic_shapes = false;
+          } else {
+            ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second);
+          }
         } else if (option.first == "device_id") {
           params.device_id = option.second.c_str();
         } else if (option.first == "num_of_threads") {

diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc
@@ -253,13 +253,21 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
 #endif
   } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
 #ifdef USE_OPENVINO
-    std::string device_type = "";           // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.
-    bool enable_vpu_fast_compile = false;   // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
-    std::string device_id = "";             // [device_id]: Selects a particular hardware device for inference.
-    size_t num_of_threads = 8;              // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime.
-    bool use_compiled_network = false;      // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists.
-    std::string blob_dump_path = "";        // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.
-    bool enable_opencl_throttling = false;  // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device (Reduces CPU Utilization when using GPU)
+    std::string device_type = "";          // [device_type]: Overrides the accelerator hardware type and precision
+                                           //   with these values at runtime.
+    bool enable_vpu_fast_compile = false;  // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
+                                           // speeds up the model's compilation to VPU device specific format.
+    std::string device_id = "";            // [device_id]: Selects a particular hardware device for inference.
+    size_t num_of_threads = 8;             // [num_of_threads]: Overrides the accelerator default value of number of
+                                           //  threads with this value at runtime.
+    bool use_compiled_network = false;     // [use_compiled_network]: Can be enabled to directly import pre-compiled
+                                           // blobs if exists.
+    std::string blob_dump_path = "";       // [blob_dump_path]: Explicitly specify the path where you would like to
+                                           // dump and load the blobs for the use_compiled_network(save/load blob)
+                                           // feature. This overrides the default path.
+    bool enable_opencl_throttling = false;    // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
+                                              // device (Reduces CPU Utilization when using GPU)
+    bool enable_dynamic_shapes = false;    // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
 #ifdef _MSC_VER
     std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
 #else
@@ -318,6 +326,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
         } else {
           ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n");
         }
+      } else if (key == "enable_dynamic_shapes") {
+        if (value == "true" || value == "True") {
+          enable_dynamic_shapes = true;
+        } else if (value == "false" || value == "False") {
+          enable_dynamic_shapes = false;
+        } else {
+          ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' "
+                    "should be a boolean i.e. true or false. Default value is false.\n");
+        }
       } else if (key == "num_of_threads") {
         std::stringstream sstream(value);
         sstream >> num_of_threads;
@@ -331,13 +348,14 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
       }
     }
     OrtOpenVINOProviderOptions options;
-    options.device_type = device_type.c_str();                    // To set the device_type
-    options.device_id = device_id.c_str();                        // To set the device_id
-    options.enable_vpu_fast_compile = enable_vpu_fast_compile;    // To enable_vpu_fast_compile, default is false
-    options.num_of_threads = num_of_threads;                      // To set number of free InferRequests, default is 8
-    options.use_compiled_network = use_compiled_network;          // To use_compiled_network, default is false
-    options.blob_dump_path = blob_dump_path.c_str();              // sets the blob_dump_path, default is ""
-    options.enable_opencl_throttling = enable_opencl_throttling;  // Enables GPU Throttling (Reduces CPU Utilization)
+    options.device_type = device_type.c_str();                  // To set the device_type
+    options.device_id = device_id.c_str();                      // To set the device_id
+    options.enable_vpu_fast_compile = enable_vpu_fast_compile;  // To enable_vpu_fast_compile, default is false
+    options.num_of_threads = num_of_threads;                    // To set number of free InferRequests, default is 8
+    options.use_compiled_network = use_compiled_network;        // To use_compiled_network, default is false
+    options.blob_dump_path = blob_dump_path.c_str();            // sets the blob_dump_path, default is ""
+    options.enable_opencl_throttling = enable_opencl_throttling;    // Enables GPU Throttling (Reduces CPU Utilization)
+    options.enable_dynamic_shapes = enable_dynamic_shapes;      // Enables Dynamic Shapes feature
     session_options.AppendExecutionProvider_OpenVINO(options);
 #else
     ORT_THROW("OpenVINO is not supported in this build\n");