From 61a74f2f4d65dbaa6e20b17a11ec66a5d55c5e7f Mon Sep 17 00:00:00 2001 From: sfatimar Date: Tue, 21 Jun 2022 20:33:58 +0530 Subject: [PATCH] Mohsin/enable dynamic shapes (#11867) * Add pypi build changes to latest Master * Add ORT training part of OV build * Disabling SqueezeOpTest.BadAxes * Add ONNXruntime branch ARG to Docker build * Changes to include file details versions * Commit File Version Updates * Change naming for linux build * Add fix for pylint format errors * Fix pylint warnings. * Enable Dynamic Shapes for OV_API_20 * Update requirements.txt whl version- internal_ci fix * Update backend_manager.cc MYRIAD Fix * Update wheel version in requirements.txt * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update setup.py * Fix pylint warnings * Fix pylint warnings 2 * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc * Update backend_manager.cc Co-authored-by: Preetha Veeramalai Co-authored-by: mayavijx Co-authored-by: Sahar Fatima Co-authored-by: mohsinmx --- .../core/session/onnxruntime_c_api.h | 5 +- .../providers/openvino/backend_manager.cc | 41 +++++++++++++++-- .../openvino/backends/basic_backend.cc | 25 ++++++++-- .../core/providers/openvino/contexts.h | 1 + .../openvino/openvino_execution_provider.cc | 1 + .../openvino/openvino_execution_provider.h | 12 +++-- .../openvino/openvino_provider_factory.cc | 26 +++++++++-- .../python/onnxruntime_pybind_state.cc | 12 ++++- onnxruntime/test/perftest/ort_test_session.cc | 46 +++++++++++++------ 9 files changed, 138 insertions(+), 31 deletions(-) diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h index 76533d874e164..ce8d83a18a33d 100644 --- a/include/onnxruntime/core/session/onnxruntime_c_api.h +++ b/include/onnxruntime/core/session/onnxruntime_c_api.h @@ -515,7 +515,9 @@ typedef struct OrtMIGraphXProviderOptions { */ typedef struct OrtOpenVINOProviderOptions { #ifdef __cplusplus - OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{}, context{}, enable_opencl_throttling{} {} + OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, + num_of_threads{}, use_compiled_network{}, blob_dump_path{}, + context{}, enable_opencl_throttling{}, enable_dynamic_shapes{} {} #endif /** \brief Device type string * @@ -529,6 +531,7 @@ typedef struct OrtOpenVINOProviderOptions { const char* blob_dump_path; // path is set to empty by default void* context; unsigned char enable_opencl_throttling; ///< 0 = disabled, nonzero = enabled + unsigned char enable_dynamic_shapes; ///< 0 = disabled, nonzero = enabled } OrtOpenVINOProviderOptions; struct OrtApi; diff --git a/onnxruntime/core/providers/openvino/backend_manager.cc b/onnxruntime/core/providers/openvino/backend_manager.cc index acb323aec8905..4241fed947d2e 100644 --- a/onnxruntime/core/providers/openvino/backend_manager.cc +++ b/onnxruntime/core/providers/openvino/backend_manager.cc @@ -1,9 +1,14 @@ // Copyright (C) 2019-2022 Intel Corporation // Licensed under the MIT License +#include +#include +#include +#include + #include "core/providers/shared_library/provider_api.h" + #include -#include #include "contexts.h" #include "backend_manager.h" @@ -84,6 +89,26 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node, subgraph_context_.has_dynamic_input_shape = false; } else if (ModelHasSymbolicInputDims(subgraph)) { + subgraph_context_.has_dynamic_input_shape = true; + if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims." + " Defering backend initialization and device_type is MYRIAD."; + } + if (GetGlobalContext().device_type.find("CPU") != std::string::npos) { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims and " + << "device_type is CPU."; + #if (defined OV_API_20) + if (GetGlobalContext().enable_dynamic_shapes) { + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. " + << "Creating backend Dynamic Shapes"; + concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), subgraph_context_); + LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " + << "Backend created for graph " << subgraph_context_.subgraph_name; + } + #endif + } + } else if (ModelHasSymbolicInputDims(subgraph) && + GetGlobalContext().device_type.find("GPU") != std::string::npos) { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims. Defering backend initialization"; subgraph_context_.has_dynamic_input_shape = true; } else { @@ -236,7 +261,15 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p } void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) { - if (subgraph_context_.has_dynamic_input_shape) { + bool use_dynamic_backend = true; + if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape && + GetGlobalContext().device_type.find("CPU") != std::string::npos) { + #if (defined OV_API_20) + concrete_backend_->Infer(api, context); + use_dynamic_backend = false; + #endif + } + if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) { std::vector> tensor_shapes = GetInputTensorShapes(api, context); auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type); @@ -251,9 +284,9 @@ void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) { auto search = backend_map_.find(key); if (search == backend_map_.end()) { LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " - << "Creating concrete backend for key: " << key; + << "Creating concrete backend for key: " << key; LOGS_DEFAULT(INFO) << "[OpenVINO-EP] " - << "Backend created for graph " << subgraph_context_.subgraph_name; + << "Backend created for graph " << subgraph_context_.subgraph_name; auto modelproto_with_concrete_shapes = ReWriteInputShapeInfo(*model_proto_, tensor_shapes); dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes, GetGlobalContext(), subgraph_context_); diff --git a/onnxruntime/core/providers/openvino/backends/basic_backend.cc b/onnxruntime/core/providers/openvino/backends/basic_backend.cc index 5459a7dd1d82f..7ce76ad6f6f5f 100644 --- a/onnxruntime/core/providers/openvino/backends/basic_backend.cc +++ b/onnxruntime/core/providers/openvino/backends/basic_backend.cc @@ -267,10 +267,29 @@ void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext* } else { ORT_THROW(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names"); } - OVTensorPtr graph_input_blob; - graph_input_blob = infer_request->GetTensor(input_name); size_t batch_slice_idx = 0; - FillInputBlob(graph_input_blob, batch_slice_idx, input_name, ort, context, subgraph_context_); + if (subgraph_context_.has_dynamic_input_shape && + global_context_.enable_dynamic_shapes == true && + global_context_.device_type.find("CPU") != std::string::npos) { + const OrtValue* tensor = ort.KernelContext_GetInput(context, subgraph_context_.input_names.at(input_name)); + auto tensor_info = ort.GetTensorTypeAndShape(tensor); + auto tensor_shape = ort.GetTensorShape(tensor_info); + auto tensor_size = tensor_shape.size(); + auto tensor_iter = 0; + ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0); + for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) { + input_tensor_shape[tensor_iter] = *i; + tensor_iter+=1; + } + auto input = ie_cnn_network_->get_parameters().at(0); + OVTensorPtr tensor_ptr = std::make_shared(input->get_element_type(), input_tensor_shape); + FillInputBlob(tensor_ptr, batch_slice_idx, input_name, ort, context, subgraph_context_); + infer_request->SetTensor(input_name, tensor_ptr); + } else { + OVTensorPtr graph_input_blob; + graph_input_blob = infer_request->GetTensor(input_name); + FillInputBlob(graph_input_blob, batch_slice_idx, input_name, ort, context, subgraph_context_); + } input_idx++; } #else diff --git a/onnxruntime/core/providers/openvino/contexts.h b/onnxruntime/core/providers/openvino/contexts.h index 171a0b5b174eb..683375802cedb 100644 --- a/onnxruntime/core/providers/openvino/contexts.h +++ b/onnxruntime/core/providers/openvino/contexts.h @@ -15,6 +15,7 @@ struct GlobalContext { bool enable_vpu_fast_compile = false; bool use_compiled_network = false; bool enable_opencl_throttling = false; + bool enable_dynamic_shapes = false; size_t num_of_threads; std::string device_type; std::string precision_str; diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc index 8fb2c499210d3..16bd72117aa97 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.cc +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.cc @@ -20,6 +20,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_; openvino_ep::BackendManager::GetGlobalContext().context = info.context_; openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_; + openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_; if ((int)info.num_of_threads_ <= 0) { diff --git a/onnxruntime/core/providers/openvino/openvino_execution_provider.h b/onnxruntime/core/providers/openvino/openvino_execution_provider.h index 8a3e65cbeb4a5..f8b949f4c3dc9 100644 --- a/onnxruntime/core/providers/openvino/openvino_execution_provider.h +++ b/onnxruntime/core/providers/openvino/openvino_execution_provider.h @@ -60,9 +60,15 @@ struct OpenVINOExecutionProviderInfo { std::string blob_dump_path_; void* context_; bool enable_opencl_throttling_; + bool enable_dynamic_shapes_; - explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path, void* context, bool enable_opencl_throttling) - : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context), enable_opencl_throttling_(enable_opencl_throttling) { + explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, + size_t num_of_threads, bool use_compiled_network, + std::string blob_dump_path, void* context, bool enable_opencl_throttling, + bool enable_dynamic_shapes) + : enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), + use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context), + enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) { if (dev_type == "") { LOGS_DEFAULT(INFO) << "[OpenVINO-EP]" << "No runtime device selection option provided."; @@ -130,7 +136,7 @@ struct OpenVINOExecutionProviderInfo { << "Choosing Device: " << device_type_ << " , Precision: " << precision_; } OpenVINOExecutionProviderInfo() { - OpenVINOExecutionProviderInfo("", false, "", 0, false,"", NULL, false); + OpenVINOExecutionProviderInfo("", false, "", 0, false, "", NULL, false, false); } }; diff --git a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc index 5510391decafd..01f25526b5494 100644 --- a/onnxruntime/core/providers/openvino/openvino_provider_factory.cc +++ b/onnxruntime/core/providers/openvino/openvino_provider_factory.cc @@ -11,8 +11,10 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, bool use_compiled_network, const char* blob_dump_path, void* context, - bool enable_opencl_throttling) - : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), context_(context), enable_opencl_throttling_(enable_opencl_throttling) { + bool enable_opencl_throttling, bool enable_dynamic_shapes) + : enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), + use_compiled_network_(use_compiled_network), context_(context), + enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) { device_type_ = (device_type == nullptr) ? "" : device_type; device_id_ = (device_id == nullptr) ? "" : device_id; blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path; @@ -31,13 +33,25 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory { std::string blob_dump_path_; void* context_; bool enable_opencl_throttling_; + bool enable_dynamic_shapes_; }; std::unique_ptr OpenVINOProviderFactory::CreateProvider() { - OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_); + OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, + use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_, + enable_dynamic_shapes_); return std::make_unique(info); } +std::shared_ptr CreateExecutionProviderFactory_OpenVINO( + const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads, + bool use_compiled_network, const char* blob_dump_path, void * context, bool enable_opencl_throttling, + bool enable_dynamic_shapes) { + return std::make_shared(device_type, enable_vpu_fast_compile, + device_id, num_of_threads, use_compiled_network, blob_dump_path, context, enable_opencl_throttling, + enable_dynamic_shapes); +} + } // namespace onnxruntime namespace onnxruntime { @@ -53,7 +67,11 @@ struct OpenVINO_Provider : Provider { std::shared_ptr CreateExecutionProviderFactory(const void* void_params) override { auto& params = *reinterpret_cast(void_params); - return std::make_shared(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path, params.context, params.enable_opencl_throttling); + return std::make_shared(params.device_type, params.enable_vpu_fast_compile, + params.device_id, params.num_of_threads, + params.use_compiled_network, params.blob_dump_path, + params.context, params.enable_opencl_throttling, + params.enable_dynamic_shapes); } void Initialize() override { diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index 72b0ed284eabd..94ae844796052 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -618,12 +618,20 @@ std::unique_ptr CreateExecutionProviderInstance( } else if (option.first == "enable_opencl_throttling") { if (option.second == "True") { - params.use_compiled_network = true; + params.enable_opencl_throttling = true; } else if (option.second == "False") { - params.use_compiled_network = false; + params.enable_opencl_throttling = false; } else { ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second); } + } else if (option.first == "enable_dynamic_shapes") { + if (option.second == "True") { + params.enable_dynamic_shapes = true; + } else if (option.second == "False") { + params.enable_dynamic_shapes = false; + } else { + ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second); + } } else if (option.first == "device_id") { params.device_id = option.second.c_str(); } else if (option.first == "num_of_threads") { diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index c97343b2933fb..d9a6a13c3f976 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -253,13 +253,21 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device #endif } else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) { #ifdef USE_OPENVINO - std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime. - bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format. - std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference. - size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime. - bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists. - std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path. - bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device (Reduces CPU Utilization when using GPU) + std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision + // with these values at runtime. + bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to + // speeds up the model's compilation to VPU device specific format. + std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference. + size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of + // threads with this value at runtime. + bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled + // blobs if exists. + std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to + // dump and load the blobs for the use_compiled_network(save/load blob) + // feature. This overrides the default path. + bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU + // device (Reduces CPU Utilization when using GPU) + bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device) #ifdef _MSC_VER std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string); #else @@ -318,6 +326,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } else { ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n"); } + } else if (key == "enable_dynamic_shapes") { + if (value == "true" || value == "True") { + enable_dynamic_shapes = true; + } else if (value == "false" || value == "False") { + enable_dynamic_shapes = false; + } else { + ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' " + "should be a boolean i.e. true or false. Default value is false.\n"); + } } else if (key == "num_of_threads") { std::stringstream sstream(value); sstream >> num_of_threads; @@ -331,13 +348,14 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device } } OrtOpenVINOProviderOptions options; - options.device_type = device_type.c_str(); // To set the device_type - options.device_id = device_id.c_str(); // To set the device_id - options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false - options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8 - options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false - options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is "" - options.enable_opencl_throttling = enable_opencl_throttling; // Enables GPU Throttling (Reduces CPU Utilization) + options.device_type = device_type.c_str(); // To set the device_type + options.device_id = device_id.c_str(); // To set the device_id + options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false + options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8 + options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false + options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is "" + options.enable_opencl_throttling = enable_opencl_throttling; // Enables GPU Throttling (Reduces CPU Utilization) + options.enable_dynamic_shapes = enable_dynamic_shapes; // Enables Dynamic Shapes feature session_options.AppendExecutionProvider_OpenVINO(options); #else ORT_THROW("OpenVINO is not supported in this build\n");