Skip to content

Commit

Permalink
Mohsin/enable dynamic shapes (#11867)
Browse files Browse the repository at this point in the history
* Add pypi build changes to latest Master

* Add ORT training part of OV build

* Disabling SqueezeOpTest.BadAxes

* Add ONNXruntime branch ARG to Docker build

* Changes to include file details versions

* Commit File Version Updates

* Change naming for linux build

* Add fix for pylint format errors

* Fix pylint warnings.

* Enable Dynamic Shapes for OV_API_20

* Update requirements.txt whl version- internal_ci fix

* Update backend_manager.cc MYRIAD Fix

* Update wheel version in requirements.txt

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

* Update setup.py

* Fix pylint warnings

* Fix pylint warnings 2

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

* Update backend_manager.cc

Co-authored-by: Preetha Veeramalai <preetha.veeramalai@intel.com>
Co-authored-by: mayavijx <mayax.vijayan@intel.com>
Co-authored-by: Sahar Fatima <sfatima.3001@gmail.com>
Co-authored-by: mohsinmx <mohsinx.mohammad@intel.com>
  • Loading branch information
5 people authored Jun 21, 2022
1 parent b20daed commit 61a74f2
Show file tree
Hide file tree
Showing 9 changed files with 138 additions and 31 deletions.
5 changes: 4 additions & 1 deletion include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -515,7 +515,9 @@ typedef struct OrtMIGraphXProviderOptions {
*/
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{}, num_of_threads{}, use_compiled_network{}, blob_dump_path{}, context{}, enable_opencl_throttling{} {}
OrtOpenVINOProviderOptions() : device_type{}, enable_vpu_fast_compile{}, device_id{},
num_of_threads{}, use_compiled_network{}, blob_dump_path{},
context{}, enable_opencl_throttling{}, enable_dynamic_shapes{} {}
#endif
/** \brief Device type string
*
Expand All @@ -529,6 +531,7 @@ typedef struct OrtOpenVINOProviderOptions {
const char* blob_dump_path; // path is set to empty by default
void* context;
unsigned char enable_opencl_throttling; ///< 0 = disabled, nonzero = enabled
unsigned char enable_dynamic_shapes; ///< 0 = disabled, nonzero = enabled
} OrtOpenVINOProviderOptions;

struct OrtApi;
Expand Down
41 changes: 37 additions & 4 deletions onnxruntime/core/providers/openvino/backend_manager.cc
Original file line number Diff line number Diff line change
@@ -1,9 +1,14 @@
// Copyright (C) 2019-2022 Intel Corporation
// Licensed under the MIT License

#include <fstream>
#include <vector>
#include <string>
#include <memory>

#include "core/providers/shared_library/provider_api.h"

#include <inference_engine.hpp>
#include <fstream>

#include "contexts.h"
#include "backend_manager.h"
Expand Down Expand Up @@ -84,6 +89,26 @@ BackendManager::BackendManager(const onnxruntime::Node& fused_node,
subgraph_context_.has_dynamic_input_shape = false;

} else if (ModelHasSymbolicInputDims(subgraph)) {
subgraph_context_.has_dynamic_input_shape = true;
if (GetGlobalContext().device_type.find("MYRIAD") != std::string::npos) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims."
" Defering backend initialization and device_type is MYRIAD.";
}
if (GetGlobalContext().device_type.find("CPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims and "
<< "device_type is CPU.";
#if (defined OV_API_20)
if (GetGlobalContext().enable_dynamic_shapes) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Starting backend initialization. "
<< "Creating backend Dynamic Shapes";
concrete_backend_ = BackendFactory::MakeBackend(*model_proto_, GetGlobalContext(), subgraph_context_);
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
<< "Backend created for graph " << subgraph_context_.subgraph_name;
}
#endif
}
} else if (ModelHasSymbolicInputDims(subgraph) &&
GetGlobalContext().device_type.find("GPU") != std::string::npos) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] Model has symbolic input dims. Defering backend initialization";
subgraph_context_.has_dynamic_input_shape = true;
} else {
Expand Down Expand Up @@ -236,7 +261,15 @@ BackendManager::ReWriteBatchDimWithOne(const ONNX_NAMESPACE::ModelProto& model_p
}

void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) {
if (subgraph_context_.has_dynamic_input_shape) {
bool use_dynamic_backend = true;
if (GetGlobalContext().enable_dynamic_shapes && subgraph_context_.has_dynamic_input_shape &&
GetGlobalContext().device_type.find("CPU") != std::string::npos) {
#if (defined OV_API_20)
concrete_backend_->Infer(api, context);
use_dynamic_backend = false;
#endif
}
if (use_dynamic_backend && subgraph_context_.has_dynamic_input_shape) {
std::vector<std::vector<int64_t>> tensor_shapes = GetInputTensorShapes(api, context);
auto key = MakeMapKeyString(tensor_shapes, GetGlobalContext().device_type);

Expand All @@ -251,9 +284,9 @@ void BackendManager::Compute(Ort::CustomOpApi api, OrtKernelContext* context) {
auto search = backend_map_.find(key);
if (search == backend_map_.end()) {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
<< "Creating concrete backend for key: " << key;
<< "Creating concrete backend for key: " << key;
LOGS_DEFAULT(INFO) << "[OpenVINO-EP] "
<< "Backend created for graph " << subgraph_context_.subgraph_name;
<< "Backend created for graph " << subgraph_context_.subgraph_name;
auto modelproto_with_concrete_shapes = ReWriteInputShapeInfo(*model_proto_, tensor_shapes);
dynamic_backend = BackendFactory::MakeBackend(*modelproto_with_concrete_shapes,
GetGlobalContext(), subgraph_context_);
Expand Down
25 changes: 22 additions & 3 deletions onnxruntime/core/providers/openvino/backends/basic_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -267,10 +267,29 @@ void BasicBackend::StartAsyncInference(Ort::CustomOpApi& ort, OrtKernelContext*
} else {
ORT_THROW(log_tag + "Input names mismatch between OpenVINO and ONNX. " + onnx_input_name + " doesn't exist in the list of OpenVINO input tensor names");
}
OVTensorPtr graph_input_blob;
graph_input_blob = infer_request->GetTensor(input_name);
size_t batch_slice_idx = 0;
FillInputBlob(graph_input_blob, batch_slice_idx, input_name, ort, context, subgraph_context_);
if (subgraph_context_.has_dynamic_input_shape &&
global_context_.enable_dynamic_shapes == true &&
global_context_.device_type.find("CPU") != std::string::npos) {
const OrtValue* tensor = ort.KernelContext_GetInput(context, subgraph_context_.input_names.at(input_name));
auto tensor_info = ort.GetTensorTypeAndShape(tensor);
auto tensor_shape = ort.GetTensorShape(tensor_info);
auto tensor_size = tensor_shape.size();
auto tensor_iter = 0;
ov::Shape input_tensor_shape = ov::Shape(tensor_size, 0);
for (auto i = tensor_shape.begin(); i != tensor_shape.end(); ++i) {
input_tensor_shape[tensor_iter] = *i;
tensor_iter+=1;
}
auto input = ie_cnn_network_->get_parameters().at(0);
OVTensorPtr tensor_ptr = std::make_shared<ov::Tensor>(input->get_element_type(), input_tensor_shape);
FillInputBlob(tensor_ptr, batch_slice_idx, input_name, ort, context, subgraph_context_);
infer_request->SetTensor(input_name, tensor_ptr);
} else {
OVTensorPtr graph_input_blob;
graph_input_blob = infer_request->GetTensor(input_name);
FillInputBlob(graph_input_blob, batch_slice_idx, input_name, ort, context, subgraph_context_);
}
input_idx++;
}
#else
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/providers/openvino/contexts.h
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ struct GlobalContext {
bool enable_vpu_fast_compile = false;
bool use_compiled_network = false;
bool enable_opencl_throttling = false;
bool enable_dynamic_shapes = false;
size_t num_of_threads;
std::string device_type;
std::string precision_str;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ OpenVINOExecutionProvider::OpenVINOExecutionProvider(const OpenVINOExecutionProv
openvino_ep::BackendManager::GetGlobalContext().blob_dump_path = info.blob_dump_path_;
openvino_ep::BackendManager::GetGlobalContext().context = info.context_;
openvino_ep::BackendManager::GetGlobalContext().enable_opencl_throttling = info.enable_opencl_throttling_;
openvino_ep::BackendManager::GetGlobalContext().enable_dynamic_shapes = info.enable_dynamic_shapes_;


if ((int)info.num_of_threads_ <= 0) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,15 @@ struct OpenVINOExecutionProviderInfo {
std::string blob_dump_path_;
void* context_;
bool enable_opencl_throttling_;
bool enable_dynamic_shapes_;

explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id, size_t num_of_threads, bool use_compiled_network, std::string blob_dump_path, void* context, bool enable_opencl_throttling)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context), enable_opencl_throttling_(enable_opencl_throttling) {
explicit OpenVINOExecutionProviderInfo(std::string dev_type, bool enable_vpu_fast_compile, std::string dev_id,
size_t num_of_threads, bool use_compiled_network,
std::string blob_dump_path, void* context, bool enable_opencl_throttling,
bool enable_dynamic_shapes)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), device_id_(dev_id), num_of_threads_(num_of_threads),
use_compiled_network_(use_compiled_network), blob_dump_path_(blob_dump_path), context_(context),
enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
if (dev_type == "") {
LOGS_DEFAULT(INFO) << "[OpenVINO-EP]"
<< "No runtime device selection option provided.";
Expand Down Expand Up @@ -130,7 +136,7 @@ struct OpenVINOExecutionProviderInfo {
<< "Choosing Device: " << device_type_ << " , Precision: " << precision_;
}
OpenVINOExecutionProviderInfo() {
OpenVINOExecutionProviderInfo("", false, "", 0, false,"", NULL, false);
OpenVINOExecutionProviderInfo("", false, "", 0, false, "", NULL, false, false);
}
};

Expand Down
26 changes: 22 additions & 4 deletions onnxruntime/core/providers/openvino/openvino_provider_factory.cc
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,10 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
OpenVINOProviderFactory(const char* device_type, bool enable_vpu_fast_compile,
const char* device_id, size_t num_of_threads,
bool use_compiled_network, const char* blob_dump_path, void* context,
bool enable_opencl_throttling)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads), use_compiled_network_(use_compiled_network), context_(context), enable_opencl_throttling_(enable_opencl_throttling) {
bool enable_opencl_throttling, bool enable_dynamic_shapes)
: enable_vpu_fast_compile_(enable_vpu_fast_compile), num_of_threads_(num_of_threads),
use_compiled_network_(use_compiled_network), context_(context),
enable_opencl_throttling_(enable_opencl_throttling), enable_dynamic_shapes_(enable_dynamic_shapes) {
device_type_ = (device_type == nullptr) ? "" : device_type;
device_id_ = (device_id == nullptr) ? "" : device_id;
blob_dump_path_ = (blob_dump_path == nullptr) ? "" : blob_dump_path;
Expand All @@ -31,13 +33,25 @@ struct OpenVINOProviderFactory : IExecutionProviderFactory {
std::string blob_dump_path_;
void* context_;
bool enable_opencl_throttling_;
bool enable_dynamic_shapes_;
};

std::unique_ptr<IExecutionProvider> OpenVINOProviderFactory::CreateProvider() {
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_, use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_);
OpenVINOExecutionProviderInfo info(device_type_, enable_vpu_fast_compile_, device_id_, num_of_threads_,
use_compiled_network_, blob_dump_path_, context_, enable_opencl_throttling_,
enable_dynamic_shapes_);
return std::make_unique<OpenVINOExecutionProvider>(info);
}

std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory_OpenVINO(
const char* device_type, bool enable_vpu_fast_compile, const char* device_id, size_t num_of_threads,
bool use_compiled_network, const char* blob_dump_path, void * context, bool enable_opencl_throttling,
bool enable_dynamic_shapes) {
return std::make_shared<onnxruntime::OpenVINOProviderFactory>(device_type, enable_vpu_fast_compile,
device_id, num_of_threads, use_compiled_network, blob_dump_path, context, enable_opencl_throttling,
enable_dynamic_shapes);
}

} // namespace onnxruntime

namespace onnxruntime {
Expand All @@ -53,7 +67,11 @@ struct OpenVINO_Provider : Provider {

std::shared_ptr<IExecutionProviderFactory> CreateExecutionProviderFactory(const void* void_params) override {
auto& params = *reinterpret_cast<const OrtOpenVINOProviderOptions*>(void_params);
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile, params.device_id, params.num_of_threads, params.use_compiled_network, params.blob_dump_path, params.context, params.enable_opencl_throttling);
return std::make_shared<OpenVINOProviderFactory>(params.device_type, params.enable_vpu_fast_compile,
params.device_id, params.num_of_threads,
params.use_compiled_network, params.blob_dump_path,
params.context, params.enable_opencl_throttling,
params.enable_dynamic_shapes);
}

void Initialize() override {
Expand Down
12 changes: 10 additions & 2 deletions onnxruntime/python/onnxruntime_pybind_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -618,12 +618,20 @@ std::unique_ptr<IExecutionProvider> CreateExecutionProviderInstance(

} else if (option.first == "enable_opencl_throttling") {
if (option.second == "True") {
params.use_compiled_network = true;
params.enable_opencl_throttling = true;
} else if (option.second == "False") {
params.use_compiled_network = false;
params.enable_opencl_throttling = false;
} else {
ORT_THROW("Invalid value passed for enable_opencl_throttling: ", option.second);
}
} else if (option.first == "enable_dynamic_shapes") {
if (option.second == "True") {
params.enable_dynamic_shapes = true;
} else if (option.second == "False") {
params.enable_dynamic_shapes = false;
} else {
ORT_THROW("Invalid value passed for enable_dynamic_shapes: ", option.second);
}
} else if (option.first == "device_id") {
params.device_id = option.second.c_str();
} else if (option.first == "num_of_threads") {
Expand Down
46 changes: 32 additions & 14 deletions onnxruntime/test/perftest/ort_test_session.cc
Original file line number Diff line number Diff line change
Expand Up @@ -253,13 +253,21 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
#endif
} else if (provider_name == onnxruntime::kOpenVINOExecutionProvider) {
#ifdef USE_OPENVINO
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision with these values at runtime.
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to speeds up the model's compilation to VPU device specific format.
std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of threads with this value at runtime.
bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled blobs if exists.
std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to dump and load the blobs for the use_compiled_network(save/load blob) feature. This overrides the default path.
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU device (Reduces CPU Utilization when using GPU)
std::string device_type = ""; // [device_type]: Overrides the accelerator hardware type and precision
// with these values at runtime.
bool enable_vpu_fast_compile = false; // [enable_vpu_fast_compile]: Fast-compile may be optionally enabled to
// speeds up the model's compilation to VPU device specific format.
std::string device_id = ""; // [device_id]: Selects a particular hardware device for inference.
size_t num_of_threads = 8; // [num_of_threads]: Overrides the accelerator default value of number of
// threads with this value at runtime.
bool use_compiled_network = false; // [use_compiled_network]: Can be enabled to directly import pre-compiled
// blobs if exists.
std::string blob_dump_path = ""; // [blob_dump_path]: Explicitly specify the path where you would like to
// dump and load the blobs for the use_compiled_network(save/load blob)
// feature. This overrides the default path.
bool enable_opencl_throttling = false; // [enable_opencl_throttling]: Enables OpenCL queue throttling for GPU
// device (Reduces CPU Utilization when using GPU)
bool enable_dynamic_shapes = false; // [enable_dynamic_shapes]: Enables Dynamic Shapes feature for CPU device)
#ifdef _MSC_VER
std::string ov_string = ToUTF8String(performance_test_config.run_config.ep_runtime_config_string);
#else
Expand Down Expand Up @@ -318,6 +326,15 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_opencl_throttling' should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "enable_dynamic_shapes") {
if (value == "true" || value == "True") {
enable_dynamic_shapes = true;
} else if (value == "false" || value == "False") {
enable_dynamic_shapes = false;
} else {
ORT_THROW("[ERROR] [OpenVINO] The value for the key 'enable_dynamic_shapes' "
"should be a boolean i.e. true or false. Default value is false.\n");
}
} else if (key == "num_of_threads") {
std::stringstream sstream(value);
sstream >> num_of_threads;
Expand All @@ -331,13 +348,14 @@ OnnxRuntimeTestSession::OnnxRuntimeTestSession(Ort::Env& env, std::random_device
}
}
OrtOpenVINOProviderOptions options;
options.device_type = device_type.c_str(); // To set the device_type
options.device_id = device_id.c_str(); // To set the device_id
options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false
options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is ""
options.enable_opencl_throttling = enable_opencl_throttling; // Enables GPU Throttling (Reduces CPU Utilization)
options.device_type = device_type.c_str(); // To set the device_type
options.device_id = device_id.c_str(); // To set the device_id
options.enable_vpu_fast_compile = enable_vpu_fast_compile; // To enable_vpu_fast_compile, default is false
options.num_of_threads = num_of_threads; // To set number of free InferRequests, default is 8
options.use_compiled_network = use_compiled_network; // To use_compiled_network, default is false
options.blob_dump_path = blob_dump_path.c_str(); // sets the blob_dump_path, default is ""
options.enable_opencl_throttling = enable_opencl_throttling; // Enables GPU Throttling (Reduces CPU Utilization)
options.enable_dynamic_shapes = enable_dynamic_shapes; // Enables Dynamic Shapes feature
session_options.AppendExecutionProvider_OpenVINO(options);
#else
ORT_THROW("OpenVINO is not supported in this build\n");
Expand Down

0 comments on commit 61a74f2

Please sign in to comment.