From 8506fc965cf661be649265f837faf1d4cb3b508f Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 26 Jun 2020 17:16:56 -0700
Subject: [PATCH 01/39] Implement Create/Release Allocator

---
 onnxruntime/core/session/device_allocator.cc  | 62 +++++++++++++++++++
 onnxruntime/core/session/inference_session.cc |  4 ++
 onnxruntime/core/session/inference_session.h  |  8 +++
 onnxruntime/core/session/onnxruntime_c_api.cc |  3 +
 onnxruntime/core/session/ort_apis.h           |  4 ++
 5 files changed, 81 insertions(+)
 create mode 100644 onnxruntime/core/session/device_allocator.cc
diff --git a/onnxruntime/core/session/device_allocator.cc b/onnxruntime/core/session/device_allocator.cc
new file mode 100644
index 0000000000000..2140cb589f7e2
--- /dev/null
+++ b/onnxruntime/core/session/device_allocator.cc
@@ -0,0 +1,62 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/framework/allocator.h"
+#include "core/framework/utils.h"
+#include "core/session/inference_session.h"
+#include "core/session/onnxruntime_cxx_api.h"
+#include "core/session/ort_apis.h"
+#include <assert.h>
+
+struct OrtAllocatorForDevice : public OrtAllocator {
+
+  explicit OrtAllocatorForDevice(onnxruntime::AllocatorPtr&& dev_allocator) 
+    : device_allocator_(std::move(dev_allocator)) {
+    OrtAllocator::version = ORT_API_VERSION;
+    OrtAllocator::Alloc = [](OrtAllocator* this_, size_t size) { return static_cast<OrtAllocatorForDevice*>(this_)->Alloc(size); };
+    OrtAllocator::Free = [](OrtAllocator* this_, void* p) { static_cast<OrtAllocatorForDevice*>(this_)->Free(p); };
+    OrtAllocator::Info = [](const OrtAllocator* this_) { return static_cast<const OrtAllocatorForDevice*>(this_)->Info(); };
+  }
+
+  ~OrtAllocatorForDevice() = default;
+
+  void* Alloc(size_t size) const {
+    return device_allocator_->Alloc(size);
+  }
+  void Free(void* p) const {
+    device_allocator_->Free(p);
+  }
+
+  const OrtMemoryInfo* Info() const {
+    return &device_allocator_->Info();
+  }
+
+  OrtAllocatorForDevice(const OrtAllocatorForDevice&) = delete;
+  OrtAllocatorForDevice& operator=(const OrtAllocatorForDevice&) = delete;
+
+ private:
+  onnxruntime::AllocatorPtr device_allocator_;
+};
+
+#define API_IMPL_BEGIN try {
+#define API_IMPL_END                                                \
+  }                                                                 \
+  catch (const std::exception& ex) {                                     \
+    return OrtApis::CreateStatus(ORT_RUNTIME_EXCEPTION, ex.what()); \
+  }
+
+ORT_API_STATUS_IMPL(OrtApis::CreateDeviceAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info, _Outptr_ OrtAllocator** out) {
+  API_IMPL_BEGIN
+  auto session = reinterpret_cast<const ::onnxruntime::InferenceSession*>(sess);
+  auto allocator_ptr = session->GetAllocator(mem_info);
+  if (!allocator_ptr) {
+    return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "No requested allocator available");
+  }
+  *out = new OrtAllocatorForDevice(std::move(allocator_ptr));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API(void, OrtApis::ReleaseDeviceAllocator, _Frees_ptr_opt_ OrtAllocator* allocator) {
+  delete reinterpret_cast<OrtAllocatorForDevice*>(allocator);
+}
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index d8f121288a967..1209b78971b9a 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1340,6 +1340,10 @@ std::string InferenceSession::EndProfiling() {
   return std::string();
 }
 
+AllocatorPtr InferenceSession::GetAllocator(const OrtMemoryInfo* mem_info) const {
+  return execution_providers_.GetAllocator(*mem_info);
+}
+
 // assumes model has already been loaded before
 common::Status InferenceSession::DoPostLoadProcessing(onnxruntime::Model& model) {
   // TODO add other post load processing here
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index d587219601cca..a11ef99a23cbd 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -340,6 +340,14 @@ class InferenceSession {
     */
   std::string EndProfiling();
 
+  /**
+    * Search registered execution providers for an allocator that has characteristics
+    * specified within mem_info
+    * @param mem_info is a pointer to OrtMemoryInfo that contains requires specs
+    * @return a ptr to the allocator or nullptr if not available
+  */
+  AllocatorPtr GetAllocator(const OrtMemoryInfo* mem_info) const;
+
  protected:
   /**
     * Load an ONNX model.
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 1f5bf29ae2eb6..2aa7a3342c465 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -1614,6 +1614,9 @@ static constexpr OrtApi ort_api_1_to_4 = {
     // Version 4 - In development, feel free to add/remove/rearrange here
     &OrtApis::GetAvailableProviders,
     &OrtApis::ReleaseAvailableProviders,
+
+    &OrtApis::CreateDeviceAllocator,
+    &OrtApis::ReleaseDeviceAllocator
 };
 
 // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other)
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 160fe907b79e9..893c716d7f9fe 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -202,4 +202,8 @@ ORT_API_STATUS_IMPL(GetAvailableProviders, _Outptr_ char ***out_ptr,
                     _In_ int *providers_length);
 ORT_API_STATUS_IMPL(ReleaseAvailableProviders, _In_ char **ptr,
                     _In_ int providers_length);
+
+ORT_API_STATUS_IMPL(CreateDeviceAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
+                   _Outptr_ OrtAllocator** out);
+ORT_API(void, ReleaseDeviceAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
 }  // namespace OrtApis

From 10833d81a8222d5397a56da1ec063b786413ef1e Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 26 Jun 2020 18:00:43 -0700
Subject: [PATCH 02/39] Add C++ API for CreateAllocator/ReleaseAllocator

---
 .../core/session/onnxruntime_c_api.h          |  5 ++++
 .../core/session/onnxruntime_cxx_api.h        | 13 +++++++++++
 .../core/session/onnxruntime_cxx_inline.h     | 23 ++++++++++++++++++-
 onnxruntime/core/session/device_allocator.cc  |  4 ++--
 onnxruntime/core/session/onnxruntime_c_api.cc |  4 ++--
 onnxruntime/core/session/ort_apis.h           |  4 ++--
 6 files changed, 46 insertions(+), 7 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index c94631620a99b..d33ae090a9147 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -836,6 +836,11 @@ struct OrtApi {
    */
   ORT_API2_STATUS(ReleaseAvailableProviders, _In_ char **ptr,
                   _In_ int providers_length);
+
+  ORT_API2_STATUS(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
+                      _Outptr_ OrtAllocator** out);
+
+  ORT_CLASS_RELEASE(Allocator);
 };
 
 /*
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index cd3f76fa8ed9b..16bb60aa51494 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -65,6 +65,7 @@ std::vector<std::string> GetAvailableProviders();
 #define ORT_DEFINE_RELEASE(NAME) \
   inline void OrtRelease(Ort##NAME* ptr) { Global<void>::api_.Release##NAME(ptr); }
 
+ORT_DEFINE_RELEASE(Allocator);
 ORT_DEFINE_RELEASE(MemoryInfo);
 ORT_DEFINE_RELEASE(CustomOpDomain);
 ORT_DEFINE_RELEASE(Env);
@@ -309,6 +310,9 @@ struct AllocatorWithDefaultOptions {
   OrtAllocator* p_{};
 };
 
+struct Allocator : public Base<OrtAllocator> {
+};
+
 struct MemoryInfo : Base<OrtMemoryInfo> {
   static MemoryInfo CreateCpu(OrtAllocatorType type, OrtMemType mem_type1);
 
@@ -318,6 +322,15 @@ struct MemoryInfo : Base<OrtMemoryInfo> {
   explicit MemoryInfo(OrtMemoryInfo* p) : Base<OrtMemoryInfo>{p} {}
 };
 
+struct Allocator : public Base<OrtAllocator> {
+  Allocator(const Session& session, const MemoryInfo&);
+
+  void* Alloc(size_t size) const;
+  void Free(void* p) const;
+  const OrtMemoryInfo* GetInfo() const;
+};
+
+
 //
 // Custom OPs (only needed to implement custom OPs)
 //
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index d0631746fb6cd..a35b506842770 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -78,6 +78,27 @@ inline MemoryInfo::MemoryInfo(const char* name, OrtAllocatorType type, int id, O
   ThrowOnError(Global<void>::api_.CreateMemoryInfo(name, type, id, mem_type, &p_));
 }
 
+inline Allocator::Allocator(const Session& sess, const MemoryInfo& mem_info) {
+  ThrowOnError(Global<void>::api_.CreateAllocator(sess.operator const OrtSession*(),
+                                                  mem_info.operator const OrtMemoryInfo*(), &p_));
+}
+
+inline void* Allocator::Alloc(size_t size) const {
+  void* out = nullptr;
+  ThrowOnError(Global<void>::api_.AllocatorAlloc(p_, size, &out));
+  return out;
+}
+
+inline void Allocator::Free(void* p) const {
+  ThrowOnError(Global<void>::api_.AllocatorFree(p_, p));
+}
+
+inline const OrtMemoryInfo* Allocator::GetInfo() const {
+  const OrtMemoryInfo* out = nullptr;
+  ThrowOnError(Global<void>::api_.AllocatorGetInfo(p_, &out));
+  return out;
+}
+
 inline Env::Env(OrtLoggingLevel default_warning_level, _In_ const char* logid) {
   ThrowOnError(Global<void>::api_.CreateEnv(default_warning_level, logid, &p_));
 }
@@ -620,7 +641,7 @@ inline SessionOptions& SessionOptions::DisablePerSessionThreads() {
 
 inline std::vector<std::string> GetAvailableProviders() {
   int len;
-  char **providers;
+  char** providers;
   const OrtApi& api = GetApi();
   ThrowOnError(api.GetAvailableProviders(&providers, &len));
   std::vector<std::string> available_providers(providers, providers + len);
diff --git a/onnxruntime/core/session/device_allocator.cc b/onnxruntime/core/session/device_allocator.cc
index 2140cb589f7e2..b45cd02c84782 100644
--- a/onnxruntime/core/session/device_allocator.cc
+++ b/onnxruntime/core/session/device_allocator.cc
@@ -45,7 +45,7 @@ struct OrtAllocatorForDevice : public OrtAllocator {
     return OrtApis::CreateStatus(ORT_RUNTIME_EXCEPTION, ex.what()); \
   }
 
-ORT_API_STATUS_IMPL(OrtApis::CreateDeviceAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info, _Outptr_ OrtAllocator** out) {
+ORT_API_STATUS_IMPL(OrtApis::CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info, _Outptr_ OrtAllocator** out) {
   API_IMPL_BEGIN
   auto session = reinterpret_cast<const ::onnxruntime::InferenceSession*>(sess);
   auto allocator_ptr = session->GetAllocator(mem_info);
@@ -57,6 +57,6 @@ ORT_API_STATUS_IMPL(OrtApis::CreateDeviceAllocator, const OrtSession* sess, cons
   API_IMPL_END
 }
 
-ORT_API(void, OrtApis::ReleaseDeviceAllocator, _Frees_ptr_opt_ OrtAllocator* allocator) {
+ORT_API(void, OrtApis::ReleaseAllocator, _Frees_ptr_opt_ OrtAllocator* allocator) {
   delete reinterpret_cast<OrtAllocatorForDevice*>(allocator);
 }
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 2aa7a3342c465..0fd4da9247c3f 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -1615,8 +1615,8 @@ static constexpr OrtApi ort_api_1_to_4 = {
     &OrtApis::GetAvailableProviders,
     &OrtApis::ReleaseAvailableProviders,
 
-    &OrtApis::CreateDeviceAllocator,
-    &OrtApis::ReleaseDeviceAllocator
+    &OrtApis::CreateAllocator,
+    &OrtApis::ReleaseAllocator
 };
 
 // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other)
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 893c716d7f9fe..dd763da0078cb 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -203,7 +203,7 @@ ORT_API_STATUS_IMPL(GetAvailableProviders, _Outptr_ char ***out_ptr,
 ORT_API_STATUS_IMPL(ReleaseAvailableProviders, _In_ char **ptr,
                     _In_ int providers_length);
 
-ORT_API_STATUS_IMPL(CreateDeviceAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
+ORT_API_STATUS_IMPL(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
                    _Outptr_ OrtAllocator** out);
-ORT_API(void, ReleaseDeviceAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
+ORT_API(void, ReleaseAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
 }  // namespace OrtApis

From 83b7742c0c29cd732c3acde324590be3c6ed2196 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 29 Jun 2020 16:53:09 -0700
Subject: [PATCH 03/39] Move Allocator API up the structure.   Add shared
 library tests.

---
 .../core/session/onnxruntime_c_api.h          | 18 +++++++---
 .../core/session/onnxruntime_cxx_api.h        |  3 --
 onnxruntime/core/session/onnxruntime_c_api.cc | 11 ++++---
 onnxruntime/core/session/ort_apis.h           |  8 ++---
 onnxruntime/test/shared_lib/test_inference.cc | 33 +++++++++++++++++++
 5 files changed, 57 insertions(+), 16 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index d33ae090a9147..8aafcb2f3a01a 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -818,6 +818,19 @@ struct OrtApi {
                   _Inout_ OrtSessionOptions* options, _In_ const char* dim_name,
                   _In_ int64_t dim_value);
 
+  /**
+   * \param sess valid OrtSession instance
+   * \para mem_info - valid OrtMemoryInfo instance
+   * \param - out a ptr to a new instance of OrtAllocator according to the spec within mem_info
+   *         if successful
+   * \return OrtStatus or nullptr if successful
+   */
+  ORT_API2_STATUS(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
+                  _Outptr_ OrtAllocator** out);
+
+  // Release instance of OrtAllocator obtained from CreateAllocator API
+  ORT_CLASS_RELEASE(Allocator);
+
   /**
    * \param out_ptr will hold a pointer to the array of char *
    * representing available providers.
@@ -836,11 +849,6 @@ struct OrtApi {
    */
   ORT_API2_STATUS(ReleaseAvailableProviders, _In_ char **ptr,
                   _In_ int providers_length);
-
-  ORT_API2_STATUS(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
-                      _Outptr_ OrtAllocator** out);
-
-  ORT_CLASS_RELEASE(Allocator);
 };
 
 /*
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 16bb60aa51494..54466a3e1adba 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -310,9 +310,6 @@ struct AllocatorWithDefaultOptions {
   OrtAllocator* p_{};
 };
 
-struct Allocator : public Base<OrtAllocator> {
-};
-
 struct MemoryInfo : Base<OrtMemoryInfo> {
   static MemoryInfo CreateCpu(OrtAllocatorType type, OrtMemType mem_type1);
 
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 0fd4da9247c3f..71773a35ccdaf 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -1611,12 +1611,15 @@ static constexpr OrtApi ort_api_1_to_4 = {
     &OrtApis::AddFreeDimensionOverrideByName,
     // End of Version 3 - DO NOT MODIFY ABOVE (see above text for more information)
 
-    // Version 4 - In development, feel free to add/remove/rearrange here
-    &OrtApis::GetAvailableProviders,
-    &OrtApis::ReleaseAvailableProviders,
+    // Version 4 - In development
 
+    // Allocator and Bidning APIs are exposed via C# API , do not move
     &OrtApis::CreateAllocator,
-    &OrtApis::ReleaseAllocator
+    &OrtApis::ReleaseAllocator,
+
+    // feel free to add/remove/rearrange here
+    &OrtApis::GetAvailableProviders,
+    &OrtApis::ReleaseAvailableProviders
 };
 
 // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other)
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index dd763da0078cb..e77a6f81a1d50 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -198,12 +198,12 @@ ORT_API_STATUS_IMPL(ModelMetadataGetCustomMetadataMapKeys, _In_ const OrtModelMe
 
 ORT_API_STATUS_IMPL(AddFreeDimensionOverrideByName, _Inout_ OrtSessionOptions* options, _In_ const char* dim_name, _In_ int64_t dim_value);
 
+ORT_API_STATUS_IMPL(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
+                    _Outptr_ OrtAllocator** out);
+ORT_API(void, ReleaseAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
+
 ORT_API_STATUS_IMPL(GetAvailableProviders, _Outptr_ char ***out_ptr,
                     _In_ int *providers_length);
 ORT_API_STATUS_IMPL(ReleaseAvailableProviders, _In_ char **ptr,
                     _In_ int providers_length);
-
-ORT_API_STATUS_IMPL(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
-                   _Outptr_ OrtAllocator** out);
-ORT_API(void, ReleaseAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
 }  // namespace OrtApis
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 2a4a23391e3cb..ff4e820826223 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -390,6 +390,39 @@ TEST(CApiTest, create_session_without_session_option) {
 }
 #endif
 
+TEST(CApiTest, GetAllocatorCPU) {
+  Ort::SessionOptions session_options;
+  OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1);
+  Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
+  Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
+  Ort::Allocator cpu_allocator(session, info_cpu);
+
+  int com_result = 0;
+  Ort::ThrowOnError(Ort::GetApi().CompareMemoryInfo(info_cpu, cpu_allocator.GetInfo(), &com_result));
+  ASSERT_EQ(com_result, 0);
+  void* p = cpu_allocator.Alloc(1024);
+  ASSERT_NE(p, nullptr);
+  cpu_allocator.Free(p);
+}
+
+#ifdef USE_CUDA
+TEST(CApiTest, GetAllocatorCUDA) {
+  Ort::SessionOptions session_options;
+  OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0);
+  Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
+
+  Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
+  Ort::Allocator cuda_allocator(session, info_cuda);
+
+  int com_result = 0;
+  Ort::ThrowOnError(Ort::GetApi().CompareMemoryInfo(info_cuda, cuda_allocator.GetInfo(), &com_result));
+  ASSERT_EQ(com_result, 0);
+  void* p = cuda_allocator.Alloc(1024);
+  ASSERT_NE(p, nullptr);
+  cuda_allocator.Free(p);
+}
+#endif
+
 TEST(CApiTest, create_tensor) {
   const char* s[] = {"abc", "kmp"};
   int64_t expected_len = 2;

From 39d33f93f557e38d2880f6b029632ff2b8444d58 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 30 Jun 2020 18:27:40 -0700
Subject: [PATCH 04/39] Add IoBnding.

---
 .../core/session/onnxruntime_c_api.h          | 35 ++++++++++-
 .../core/session/onnxruntime_cxx_api.h        |  8 +++
 .../core/session/onnxruntime_cxx_inline.h     | 20 ++++++
 onnxruntime/core/session/inference_session.cc |  4 +-
 onnxruntime/core/session/onnxruntime_c_api.cc | 61 ++++++++++++++++++-
 onnxruntime/core/session/ort_apis.h           |  9 +++
 6 files changed, 133 insertions(+), 4 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 8aafcb2f3a01a..1f4e84d62a0b3 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -141,6 +141,7 @@ typedef enum OrtErrorCode {
 ORT_RUNTIME_CLASS(Env);
 ORT_RUNTIME_CLASS(Status);  // nullptr for Status* indicates success
 ORT_RUNTIME_CLASS(MemoryInfo);
+ORT_RUNTIME_CLASS(IoBinding);
 ORT_RUNTIME_CLASS(Session);  //Don't call OrtReleaseSession from Dllmain (because session owns a thread pool)
 ORT_RUNTIME_CLASS(Value);
 ORT_RUNTIME_CLASS(RunOptions);
@@ -825,12 +826,44 @@ struct OrtApi {
    *         if successful
    * \return OrtStatus or nullptr if successful
    */
-  ORT_API2_STATUS(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info,
+  ORT_API2_STATUS(CreateAllocator, _In_ const OrtSession* sess, _In_ const OrtMemoryInfo* mem_info,
                   _Outptr_ OrtAllocator** out);
 
   // Release instance of OrtAllocator obtained from CreateAllocator API
   ORT_CLASS_RELEASE(Allocator);
 
+  // Creates an IoBinding instance that allows one to bind pre-allocated OrtValues
+  // to input names. Thus if you want to use a raw on device buffer as input or output
+  // you can avoid extra copy during runtime.
+  ORT_API2_STATUS(CreateIoBinding, _Inout_ OrtSession* sess, _Outptr_ OrtIoBinding** out);
+
+  // Release instance or OrtIoBinding obtained from CreateIoBinding API
+  ORT_CLASS_RELEASE(IoBinding);
+
+  /**
+   * The function will bind the OrtValue to a specified input name.
+   * The OrtValue must be a Tensor. ORT would use that value in place of input for the specified name.
+   * \param binding_ptr - an instance of OrtIoBinding created by CreateIoBinding()
+   * \param name - name for the model input
+   * \param  val_ptr - OrtValue of Tensor type.
+   * \return OrtStatus instance on error which the caller is responsible to free or nullptr on success
+   */
+  ORT_API2_STATUS(BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
+  /**
+   * The function will bind the OrtValue to the specified output name.
+   * The OrtValue must be a Tensor. ORT would use that value in place of output for the specified name.
+   * \param binding_ptr - an instance of OrtIoBinding created by CreateIoBinding()
+   * \param name - name for the model output
+   * \param  val_ptr - OrtValue of Tensor type.
+   * \return OrtStatus instance on error which the caller is responsible to free or nullptr on success
+   */
+  ORT_API2_STATUS(BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
+
+  /** Clears any previously specified bindings for inputs/outputs
+   */
+  void(ORT_API_CALL* ClearBoundInputs)(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
+  void(ORT_API_CALL* ClearBoundOutputs)(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
+
   /**
    * \param out_ptr will hold a pointer to the array of char *
    * representing available providers.
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 54466a3e1adba..61bbcd11fcf06 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -77,6 +77,7 @@ ORT_DEFINE_RELEASE(TypeInfo);
 ORT_DEFINE_RELEASE(Value);
 ORT_DEFINE_RELEASE(ModelMetadata);
 ORT_DEFINE_RELEASE(ThreadingOptions);
+ORT_DEFINE_RELEASE(IoBinding);
 
 // This is used internally by the C++ API. This is the common base class used by the wrapper objects.
 template <typename T>
@@ -327,6 +328,13 @@ struct Allocator : public Base<OrtAllocator> {
   const OrtMemoryInfo* GetInfo() const;
 };
 
+struct IoBinding : public Base<OrtIoBinding> {
+  explicit IoBinding(Session& session);
+  void BindInput(const char* name, const Value&);
+  void BindOutput(const char* name, const Value&);
+  void ClearBoundInputs();
+  void ClearBoundOutputs();
+};
 
 //
 // Custom OPs (only needed to implement custom OPs)
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index a35b506842770..5eb2449646e85 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -99,6 +99,26 @@ inline const OrtMemoryInfo* Allocator::GetInfo() const {
   return out;
 }
 
+inline IoBinding::IoBinding(Session& session) {
+  ThrowOnError(Global<void>::api_.CreateIoBinding(session, &p_));
+}
+
+inline void IoBinding::BindInput(const char* name, const Value& value) {
+  ThrowOnError(Global<void>::api_.BindInput(p_, name, value));
+}
+
+inline void IoBinding::BindOutput(const char* name, const Value& value) {
+  ThrowOnError(Global<void>::api_.BindOutput(p_, name, value));
+}
+
+inline void IoBinding::ClearBoundInputs() {
+  Global<void>::api_.ClearBoundInputs(p_);
+}
+
+inline void IoBinding::ClearBoundOutputs() {
+  Global<void>::api_.ClearBoundOutputs(p_);
+}
+
 inline Env::Env(OrtLoggingLevel default_warning_level, _In_ const char* logid) {
   ThrowOnError(Global<void>::api_.CreateEnv(default_warning_level, logid, &p_));
 }
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 0b8eb465b96c6..8b7a8d84a7d0a 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1378,8 +1378,8 @@ std::string InferenceSession::EndProfiling() {
 }
 
 AllocatorPtr InferenceSession::GetAllocator(const OrtMemoryInfo* mem_info) const {
-  return execution_providers_.GetAllocator(*mem_info);
-}
+  return session_state_->GetAllocator(mem_info->device);
+ }
 
 // assumes model has already been loaded before
 common::Status InferenceSession::DoPostLoadProcessing(onnxruntime::Model& model) {
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 71773a35ccdaf..684b521d2fbb3 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -3,6 +3,7 @@
 
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/allocator_impl.h"
+#include "core/session/IOBinding.h"
 #include "core/framework/error_code_helper.h"
 #include "core/framework/execution_provider.h"
 #include "core/framework/utils.h"
@@ -522,6 +523,58 @@ ORT_API_STATUS_IMPL(OrtApis::Run, _Inout_ OrtSession* sess, _In_opt_ const OrtRu
   API_IMPL_END
 }
 
+struct OrtIoBinding {
+  std::unique_ptr<::onnxruntime::IOBinding> binding_;
+  explicit OrtIoBinding(std::unique_ptr<::onnxruntime::IOBinding>&& binding) : binding_(std::move(binding)) {}
+  OrtIoBinding(const OrtIoBinding&) = delete;
+  OrtIoBinding& operator=(const OrtIoBinding&) = delete;
+};
+
+ORT_API_STATUS_IMPL(OrtApis::CreateIoBinding, _Inout_ OrtSession* sess, _Outptr_ OrtIoBinding** out) {
+  API_IMPL_BEGIN
+  auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);
+  std::unique_ptr<::onnxruntime::IOBinding> binding;
+  auto status = session->NewIOBinding(&binding);
+  if (!status.IsOK()) {
+    return ToOrtStatus(status);
+  }
+  *out = new OrtIoBinding(std::move(binding));
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API(void, OrtApis::ReleaseIoBinding, _Frees_ptr_opt_ OrtIoBinding* binding_ptr) {
+  delete binding_ptr;
+}
+
+ORT_API_STATUS_IMPL(OrtApis::BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr) {
+  API_IMPL_BEGIN
+  auto st = binding_ptr->binding_->BindInput(name, *val_ptr);
+  if (!st.IsOK()) {
+    return ToOrtStatus(st);
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+ORT_API_STATUS_IMPL(OrtApis::BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr) {
+  API_IMPL_BEGIN
+  auto st = binding_ptr->binding_->BindOutput(name, *val_ptr);
+  if (!st.IsOK()) {
+    return ToOrtStatus(st);
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+void OrtApis::ClearBoundInputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
+  binding_ptr->binding_->ClearInputs();
+}
+
+void OrtApis::ClearBoundOutputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
+  binding_ptr->binding_->ClearOutputs();
+}
+
 ORT_API_STATUS_IMPL(OrtApis::IsTensor, _In_ const OrtValue* value, _Out_ int* out) {
   auto v = reinterpret_cast<const ::OrtValue*>(value);
   *out = v->IsTensor() ? 1 : 0;
@@ -1613,9 +1666,15 @@ static constexpr OrtApi ort_api_1_to_4 = {
 
     // Version 4 - In development
 
-    // Allocator and Bidning APIs are exposed via C# API , do not move
+    // Allocator and Binding APIs are exposed via C# API , do not move
     &OrtApis::CreateAllocator,
     &OrtApis::ReleaseAllocator,
+    &OrtApis::CreateIoBinding,
+    &OrtApis::ReleaseIoBinding,
+    &OrtApis::BindInput,
+    &OrtApis::BindOutput,
+    &OrtApis::ClearBoundInputs,
+    &OrtApis::ClearBoundOutputs,
 
     // feel free to add/remove/rearrange here
     &OrtApis::GetAvailableProviders,
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index e77a6f81a1d50..7aa3d0b4497fb 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -202,6 +202,15 @@ ORT_API_STATUS_IMPL(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo
                     _Outptr_ OrtAllocator** out);
 ORT_API(void, ReleaseAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
 
+ORT_API_STATUS_IMPL(CreateIoBinding, _Inout_ OrtSession* sess, _Outptr_ OrtIoBinding** out);
+ORT_API(void, ReleaseIoBinding, _Frees_ptr_opt_ OrtIoBinding* allocator);
+
+ORT_API_STATUS_IMPL(BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
+ORT_API_STATUS_IMPL(BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
+
+ORT_API(void, ClearBoundInputs, _Inout_ OrtIoBinding* binding_ptr);
+ORT_API(void, ClearBoundOutputs, _Inout_ OrtIoBinding* binding_ptr);
+
 ORT_API_STATUS_IMPL(GetAvailableProviders, _Outptr_ char ***out_ptr,
                     _In_ int *providers_length);
 ORT_API_STATUS_IMPL(ReleaseAvailableProviders, _In_ char **ptr,

From e4e39c9461dad1b5abe24c633ef1a5c1f4200583 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 1 Jul 2020 13:48:14 -0700
Subject: [PATCH 05/39] Add Seesion:Run() with binding and tests.   Run with
 APF.

---
 .../core/session/onnxruntime_c_api.h          |  2 ++
 .../core/session/onnxruntime_cxx_api.h        |  2 ++
 .../core/session/onnxruntime_cxx_inline.h     |  4 +++
 onnxruntime/core/session/inference_session.cc |  2 +-
 onnxruntime/core/session/onnxruntime_c_api.cc | 13 ++++++++
 onnxruntime/core/session/ort_apis.h           |  2 ++
 onnxruntime/test/shared_lib/test_inference.cc | 33 +++++++++++++++++--
 7 files changed, 55 insertions(+), 3 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 1f4e84d62a0b3..e354ddc6c38d1 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -832,6 +832,8 @@ struct OrtApi {
   // Release instance of OrtAllocator obtained from CreateAllocator API
   ORT_CLASS_RELEASE(Allocator);
 
+  ORT_API2_STATUS(RunWithBinding, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options, _In_ const OrtIoBinding* binding_ptr);
+
   // Creates an IoBinding instance that allows one to bind pre-allocated OrtValues
   // to input names. Thus if you want to use a raw on device buffer as input or output
   // you can avoid extra copy during runtime.
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 61bbcd11fcf06..b3b485e1ae60f 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -222,6 +222,8 @@ struct Session : Base<OrtSession> {
   void Run(const RunOptions& run_options, const char* const* input_names, const Value* input_values, size_t input_count,
            const char* const* output_names, Value* output_values, size_t output_count);
 
+  void Run(const RunOptions& run_options, const struct IoBinding&);
+
   size_t GetInputCount() const;
   size_t GetOutputCount() const;
   size_t GetOverridableInitializerCount() const;
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 5eb2449646e85..425b19cc812bb 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -289,6 +289,10 @@ inline void Session::Run(const RunOptions& run_options, const char* const* input
   ThrowOnError(Global<void>::api_.Run(p_, run_options, input_names, ort_input_values, input_count, output_names, output_count, ort_output_values));
 }
 
+inline void Session::Run(const RunOptions& run_options, const IoBinding& io_binding) {
+  ThrowOnError(Global<void>::api_.RunWithBinding(p_, run_options, io_binding));
+}
+
 inline size_t Session::GetInputCount() const {
   size_t out;
   ThrowOnError(Global<void>::api_.SessionGetInputCount(p_, &out));
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index 8b7a8d84a7d0a..7a83176ef1b54 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1378,7 +1378,7 @@ std::string InferenceSession::EndProfiling() {
 }
 
 AllocatorPtr InferenceSession::GetAllocator(const OrtMemoryInfo* mem_info) const {
-  return session_state_->GetAllocator(mem_info->device);
+  return session_state_->GetAllocator(*mem_info);
  }
 
 // assumes model has already been loaded before
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 684b521d2fbb3..3eb732348769c 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -530,6 +530,18 @@ struct OrtIoBinding {
   OrtIoBinding& operator=(const OrtIoBinding&) = delete;
 };
 
+ORT_API_STATUS_IMPL(OrtApis::RunWithBinding, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options,
+  const OrtIoBinding* binding_ptr) {
+  API_IMPL_BEGIN
+  auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);
+  auto status = session->Run(*run_options, *binding_ptr->binding_);
+  if (!status.IsOK()) {
+    return ToOrtStatus(status);
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
 ORT_API_STATUS_IMPL(OrtApis::CreateIoBinding, _Inout_ OrtSession* sess, _Outptr_ OrtIoBinding** out) {
   API_IMPL_BEGIN
   auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);
@@ -1669,6 +1681,7 @@ static constexpr OrtApi ort_api_1_to_4 = {
     // Allocator and Binding APIs are exposed via C# API , do not move
     &OrtApis::CreateAllocator,
     &OrtApis::ReleaseAllocator,
+    &OrtApis::RunWithBinding,
     &OrtApis::CreateIoBinding,
     &OrtApis::ReleaseIoBinding,
     &OrtApis::BindInput,
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 7aa3d0b4497fb..7f29bd99d8a2e 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -202,6 +202,8 @@ ORT_API_STATUS_IMPL(CreateAllocator, const OrtSession* sess, const OrtMemoryInfo
                     _Outptr_ OrtAllocator** out);
 ORT_API(void, ReleaseAllocator, _Frees_ptr_opt_ OrtAllocator* allocator);
 
+ORT_API_STATUS_IMPL(RunWithBinding, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options, _In_ const OrtIoBinding* binding_ptr);
+
 ORT_API_STATUS_IMPL(CreateIoBinding, _Inout_ OrtSession* sess, _Outptr_ OrtIoBinding** out);
 ORT_API(void, ReleaseIoBinding, _Frees_ptr_opt_ OrtIoBinding* allocator);
 
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index ff4e820826223..eb2e874ff900c 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -390,7 +390,7 @@ TEST(CApiTest, create_session_without_session_option) {
 }
 #endif
 
-TEST(CApiTest, GetAllocatorCPU) {
+TEST(CApiTest, get_allocator_cpu) {
   Ort::SessionOptions session_options;
   OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1);
   Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
@@ -406,7 +406,7 @@ TEST(CApiTest, GetAllocatorCPU) {
 }
 
 #ifdef USE_CUDA
-TEST(CApiTest, GetAllocatorCUDA) {
+TEST(CApiTest, get_allocator_cuda) {
   Ort::SessionOptions session_options;
   OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0);
   Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
@@ -423,6 +423,35 @@ TEST(CApiTest, GetAllocatorCUDA) {
 }
 #endif
 
+TEST(CApiTest, io_bnding) {
+  Ort::SessionOptions session_options;
+  OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1);
+  Ort::Session session(*ort_env, MODEL_URI, session_options);
+
+  Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
+
+  const std::array<int64_t, 2> x_shape = {3, 2};
+  std::array<float, 3 * 2> x_values  = {1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f};
+  Ort::Value bound_x = Ort::Value::CreateTensor(info_cpu, x_values.data(), x_values.size(),
+    x_shape.data(), x_shape.size());
+
+  const std::array<float, 3 * 2> expected_y = {1.0f, 4.0f, 9.0f, 16.0f, 25.0f, 36.0f};
+  const std::array<int64_t, 2> y_shape = {3, 2};
+  std::array<float, 3 * 2> y_values;
+  Ort::Value bound_y = Ort::Value::CreateTensor(info_cpu, y_values.data(), y_values.size(),
+                                                y_shape.data(), y_shape.size());
+
+
+  Ort::IoBinding binding(session);
+  binding.BindInput("X", bound_x);
+  binding.BindOutput("Y", bound_y);
+
+  session.Run(Ort::RunOptions(), binding);
+  ASSERT_TRUE(std::equal(std::cbegin(y_values), std::cend(y_values), std::cbegin(expected_y)));
+  binding.ClearBoundInputs();
+  binding.ClearBoundOutputs();
+}
+
 TEST(CApiTest, create_tensor) {
   const char* s[] = {"abc", "kmp"};
   int64_t expected_len = 2;

From c3bb59adb4c2c55b571a99fe1704f6f772d0d691 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 1 Jul 2020 17:48:40 -0700
Subject: [PATCH 06/39] Expose new allocator and io_binding api to c#
 nativemethods.

---
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs | 140 ++++++++++++++++--
 1 file changed, 131 insertions(+), 9 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index c099f8ce6c730..92ab5bc864bd2 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -138,6 +138,33 @@ public struct OrtApi
         public IntPtr ReleaseMapTypeInfo;
         public IntPtr ReleaseSequenceTypeInfo;
         public IntPtr SessionEndProfiling;
+        public IntPtr SessionGetModelMetadata;
+        public IntPtr ModelMetadataGetProducerName;
+        public IntPtr ModelMetadataGetGraphName;
+        public IntPtr ModelMetadataGetDomain;
+        public IntPtr ModelMetadataGetDescription;
+        public IntPtr ModelMetadataLookupCustomMetadataMap;
+        public IntPtr ModelMetadataGetVersion;
+        public IntPtr ReleaseModelMetadata;
+        // End of Version 2
+
+        public IntPtr CreateEnvWithGlobalThreadPools;
+        public IntPtr DisablePerSessionThreads;
+        public IntPtr CreateThreadingOptions;
+        public IntPtr ReleaseThreadingOptions;
+        public IntPtr ModelMetadataGetCustomMetadataMapKeys;
+        public IntPtr AddFreeDimensionOverrideByName;
+        // End of Version 3 - DO NOT MODIFY ABOVE (see above text for more information)
+
+        public IntPtr CreateAllocator;
+        public IntPtr ReleaseAllocator;
+        public IntPtr RunWithBinding;
+        public IntPtr CreateIoBinding;
+        public IntPtr ReleaseIoBinding;
+        public IntPtr BindInput;
+        public IntPtr BindOutput;
+        public IntPtr ClearBoundInputs;
+        public IntPtr ClearBoundOutputs;
     }
 
     internal static class NativeMethods
@@ -154,7 +181,7 @@ static NativeMethods()
             DOrtGetApi OrtGetApi = (DOrtGetApi)Marshal.GetDelegateForFunctionPointer(OrtGetApiBase().GetApi, typeof(DOrtGetApi));
 
             // TODO: Make this save the pointer, and not copy the whole structure across
-            api_ = (OrtApi)OrtGetApi(1 /*ORT_API_VERSION*/);
+            api_ = (OrtApi)OrtGetApi(4 /*ORT_API_VERSION*/);
 
             OrtCreateEnv = (DOrtCreateEnv)Marshal.GetDelegateForFunctionPointer(api_.CreateEnv, typeof(DOrtCreateEnv));
             OrtReleaseEnv = (DOrtReleaseEnv)Marshal.GetDelegateForFunctionPointer(api_.ReleaseEnv, typeof(DOrtReleaseEnv));
@@ -213,10 +240,20 @@ static NativeMethods()
             OrtCreateCpuMemoryInfo = (DOrtCreateCpuMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.CreateCpuMemoryInfo, typeof(DOrtCreateCpuMemoryInfo));
             OrtReleaseMemoryInfo = (DOrtReleaseMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.ReleaseMemoryInfo, typeof(DOrtReleaseMemoryInfo));
             OrtGetAllocatorWithDefaultOptions = (DOrtGetAllocatorWithDefaultOptions)Marshal.GetDelegateForFunctionPointer(api_.GetAllocatorWithDefaultOptions, typeof(DOrtGetAllocatorWithDefaultOptions));
+            OrtCreateAllocator = (DOrtCreateAllocator)Marshal.GetDelegateForFunctionPointer(api_.CreateAllocator, typeof(DOrtCreateAllocator));
+            OrtReleaseAllocator = (DOrtReleaseAllocator)Marshal.GetDelegateForFunctionPointer(api_.ReleaseAllocator, typeof(DOrtReleaseAllocator));
+            OrtAllocatorAlloc = (DOrtAllocatorAlloc)Marshal.GetDelegateForFunctionPointer(api_.AllocatorAlloc, typeof(DOrtAllocatorAlloc));
             OrtAllocatorFree = (DOrtAllocatorFree)Marshal.GetDelegateForFunctionPointer(api_.AllocatorFree, typeof(DOrtAllocatorFree));
             OrtAllocatorGetInfo = (DOrtAllocatorGetInfo)Marshal.GetDelegateForFunctionPointer(api_.AllocatorGetInfo, typeof(DOrtAllocatorGetInfo));
             OrtAddFreeDimensionOverride = (DOrtAddFreeDimensionOverride)Marshal.GetDelegateForFunctionPointer(api_.AddFreeDimensionOverride, typeof(DOrtAddFreeDimensionOverride));
 
+            OrtCreateIoBinding = (DOrtCreateIoBinding)Marshal.GetDelegateForFunctionPointer(api_.CreateIoBinding, typeof(DOrtCreateIoBinding));
+            OrtReleaseIoBinding = (DOrtReleaseIoBinding)Marshal.GetDelegateForFunctionPointer(api_.ReleaseIoBinding, typeof(DOrtReleaseIoBinding));
+            OrtBindInput = (DOrtBindInput)Marshal.GetDelegateForFunctionPointer(api_.BindInput, typeof(DOrtBindInput));
+            OrtBindOutput = (DOrtBindOutput)Marshal.GetDelegateForFunctionPointer(api_.BindOutput, typeof(DOrtBindOutput));
+            OrtClearBoundInputs = (DOrtClearBoundInputs)Marshal.GetDelegateForFunctionPointer(api_.ClearBoundInputs, typeof(DOrtClearBoundInputs));
+            OrtClearBoundOutputs = (DOrtClearBoundOutputs)Marshal.GetDelegateForFunctionPointer(api_.ClearBoundOutputs, typeof(DOrtClearBoundOutputs));
+
             OrtGetValue = (DOrtGetValue)Marshal.GetDelegateForFunctionPointer(api_.GetValue, typeof(DOrtGetValue));
             OrtGetValueType = (DOrtGetValueType)Marshal.GetDelegateForFunctionPointer(api_.GetValueType, typeof(DOrtGetValueType));
             OrtGetOnnxTypeFromTypeInfo = (DOrtGetOnnxTypeFromTypeInfo)Marshal.GetDelegateForFunctionPointer(api_.GetOnnxTypeFromTypeInfo, typeof(DOrtGetOnnxTypeFromTypeInfo));
@@ -529,7 +566,6 @@ public enum MemoryType
                                                        );
         public static DOrtCreateMemoryInfo OrtCreateMemoryInfo;
 
-        //ORT_API_STATUS(OrtCreateCpuMemoryInfo, enum OrtAllocatorType type, enum OrtMemType mem_type1, _Out_ OrtMemoryInfo** out)
         public delegate IntPtr /* (OrtStatus*)*/ DOrtCreateCpuMemoryInfo(
                                                             AllocatorType allocatorType,
                                                             MemoryType memoryType,
@@ -543,19 +579,105 @@ public enum MemoryType
         public delegate IntPtr /*(OrtStatus*)*/DOrtGetAllocatorWithDefaultOptions(out IntPtr /*(OrtAllocator**)*/ allocator);
         public static DOrtGetAllocatorWithDefaultOptions OrtGetAllocatorWithDefaultOptions;
 
+        public delegate IntPtr /*(OrtStatus*)*/DOrtAllocatorGetInfo(IntPtr /*(const OrtAllocator*)*/ ptr, out IntPtr /*(const struct OrtMemoryInfo**)*/info);
+        public static DOrtAllocatorGetInfo OrtAllocatorGetInfo;
+
         /// <summary>
-        /// Release any object allocated by an allocator
+        /// Create an instance of allocator according to mem_info
         /// </summary>
-        /// <param name="allocator"></param>
-        /// <param name="memory"></param>
-        public delegate IntPtr /*(OrtStatus*)*/DOrtAllocatorFree(IntPtr allocator, IntPtr memory);
-        public static DOrtAllocatorFree OrtAllocatorFree;
+        /// <param name="session">Session that this allocator should be used with</param>
+        /// <param name="info">memory allocator specs</param>
+        /// <param name="allocator">out pointer to a new allocator instance</param>
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtCreateAllocator(IntPtr /*(const OrtSession*)*/ session, IntPtr /*(const OrtMemoryInfo*)*/ info, out IntPtr /*(OrtAllocator**)*/ allocator);
+        public static DOrtCreateAllocator OrtCreateAllocator;
 
-        public delegate IntPtr /*(OrtStatus*)*/DOrtAllocatorGetInfo(IntPtr /*(const OrtAllocator*)*/ ptr, out IntPtr /*(const struct OrtMemoryInfo**)*/info);
-        public static DOrtAllocatorGetInfo OrtAllocatorGetInfo;
+        /// <summary>
+        /// Destroy an instance of an allocator created by OrtCreateAllocator
+        /// </summary>
+        /// <param name="allocator">instance to be destroyed</param>
+        public delegate void DOrtReleaseAllocator(IntPtr /*(OrtAllocator*)*/ allocator);
+        public static DOrtReleaseAllocator OrtReleaseAllocator;
+
+        /// <summary>
+        /// Allocate  a chunk of native memory
+        /// </summary>
+        /// <param name="allocator">allocator instance</param>
+        /// <param name="size">bytes to allocate</param>
+        /// <param name="p">out pointer to the allocated memory. Must be freed by OrtAllocatorFree</param>
+        public delegate IntPtr DOrtAllocatorAlloc(IntPtr /*(OrtAllocator*)*/ allocator, UIntPtr /*size_t*/ size, out IntPtr /*(void**)*/ p);
+        public static DOrtAllocatorAlloc OrtAllocatorAlloc;
+
+        /// <summary>
+        /// Release native memory allocated by an allocator
+        /// </summary>
+        /// <param name="allocator">allocator instance</param>
+        /// <param name="p">pointer to native memory allocated by the allocator instance</param>
+        public delegate IntPtr DOrtAllocatorFree(IntPtr /*(OrtAllocator*)*/ allocator, IntPtr /*(void*)*/ p);
+        public static DOrtAllocatorFree OrtAllocatorFree;
 
         #endregion Allocator/MemoryInfo API
 
+        #region IoBinding API
+        /// <summary>
+        /// Create OrtIoBinding instance that is used to bind memory that is allocated
+        /// either by a 3rd party allocator or an ORT device allocator. Such memory should be wrapped by
+        /// a native OrtValue of Tensor type. By binding such named values you will direct ORT to read model inputs
+        /// and write model outputs to the supplied memory.
+        /// </summary>
+        /// <param name="session">session to create OrtIoBinding instance</param>
+        /// <param name="io_bidning">out a new instance of OrtIoBinding</param>
+        public delegate IntPtr DOrtCreateIoBinding(IntPtr /*(const OrtSession*)*/ session, out IntPtr /*(OrtIoBinding)*/ io_binding);
+        public static DOrtCreateIoBinding OrtCreateIoBinding;
+
+        /// <summary>
+        /// Destroy OrtIoBinding instance created by OrtCreateIoBinding
+        /// </summary>
+        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        public delegate void DOrtReleaseIoBinding(IntPtr /*(OrtIoBinding)*/ io_binding);
+        public static DOrtReleaseIoBinding OrtReleaseIoBinding;
+
+        /// <summary>
+        /// Bind OrtValue to the model input with the specified name
+        /// If binding with the specified name already exists, it will be replaced
+        /// </summary>
+        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        /// <param name="name">model input name (utf-8)</param>
+        /// <param name="ort_value">OrtValue that is used for input (may wrap arbitrary memory). 
+        ///      The param instance is copied internally so this argument may be released.
+        /// </param>
+        public delegate IntPtr DOrtBindInput(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*)*/ name, IntPtr /*const OrtValue**/ ort_value);
+        public static DOrtBindInput OrtBindInput;
+
+        /// <summary>
+        /// Bind OrtValue to the model output with the specified name
+        /// If binding with the specified name already exists, it will be replaced
+        /// </summary>
+        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        /// <param name="name">model output name (utf-8)</param>
+        /// <param name="ort_value">OrtValue that is used for output (may wrap arbitrary memory). 
+        ///      The param instance is copied internally so this argument may be released.
+        /// </param>
+        public delegate IntPtr DOrtBindOutput(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*) */ name, IntPtr /*const OrtValue**/ ort_value);
+        public static DOrtBindOutput OrtBindOutput;
+
+        /// <summary>
+        /// Clears Input bindings. This is a convenience method.
+        /// Releasing IoBinding instance would clear all bound inputs.
+        /// </summary>
+        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        public delegate void DOrtClearBoundInputs(IntPtr /*(OrtIoBinding)*/ io_binding);
+        public static DOrtClearBoundInputs OrtClearBoundInputs;
+
+        /// <summary>
+        /// Clears Output bindings. This is a convenience method.
+        /// Releasing IoBinding instance would clear all bound outputs.
+        /// </summary>
+        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        public delegate void DOrtClearBoundOutputs(IntPtr /*(OrtIoBinding)*/ io_binding);
+        public static DOrtClearBoundOutputs OrtClearBoundOutputs;
+
+        #endregion IoBinding API
+
         #region Tensor/OnnxValue API
 
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetValue(IntPtr /*(OrtValue*)*/ value,

From ef5d5be27217e885b74616ca13b07e027c69e53f Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Thu, 2 Jul 2020 14:44:21 -0700
Subject: [PATCH 07/39] Do not use outside pointers for meminfo names as they
 may be shortlived.

---
 onnxruntime/core/framework/allocator.cc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/onnxruntime/core/framework/allocator.cc b/onnxruntime/core/framework/allocator.cc
index cebbb1b53c244..20523566b8290 100644
--- a/onnxruntime/core/framework/allocator.cc
+++ b/onnxruntime/core/framework/allocator.cc
@@ -58,14 +58,14 @@ std::ostream& operator<<(std::ostream& out, const OrtMemoryInfo& info) { return
 ORT_API_STATUS_IMPL(OrtApis::CreateMemoryInfo, _In_ const char* name1, enum OrtAllocatorType type, int id1,
                     enum OrtMemType mem_type1, _Outptr_ OrtMemoryInfo** out) {
   if (strcmp(name1, onnxruntime::CPU) == 0) {
-    *out = new OrtMemoryInfo(name1, type, OrtDevice(), id1, mem_type1);
+    *out = new OrtMemoryInfo(onnxruntime::CPU, type, OrtDevice(), id1, mem_type1);
   } else if (strcmp(name1, onnxruntime::CUDA) == 0) {
     *out = new OrtMemoryInfo(
-        name1, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, static_cast<OrtDevice::DeviceId>(id1)), id1,
+        onnxruntime::CUDA, type, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, static_cast<OrtDevice::DeviceId>(id1)), id1,
         mem_type1);
   } else if (strcmp(name1, onnxruntime::CUDA_PINNED) == 0) {
     *out = new OrtMemoryInfo(
-        name1, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::CUDA_PINNED, static_cast<OrtDevice::DeviceId>(id1)),
+        onnxruntime::CUDA_PINNED, type, OrtDevice(OrtDevice::CPU, OrtDevice::MemType::CUDA_PINNED, static_cast<OrtDevice::DeviceId>(id1)),
         id1, mem_type1);
   } else {
     return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "Specified device is not supported.");

From 8503904ac6cc8d9d3208c29392bc29097899bdb5 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Thu, 2 Jul 2020 17:52:36 -0700
Subject: [PATCH 08/39] Convert MemoryInfo to a pubvlic class exposing
 properties

---
 .../InferenceSession.cs                       |   2 -
 .../NativeMemoryAllocator.cs                  | 129 ++++++++++++++----
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  55 +++++---
 .../NativeOnnxValueHelper.cs                  |   8 +-
 4 files changed, 145 insertions(+), 49 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index cd3145a4c3e12..18d7d23ec261f 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -10,8 +10,6 @@
 
 namespace Microsoft.ML.OnnxRuntime
 {
-
-
     /// <summary>
     /// Represents an Inference Session on an ONNX Model
     /// </summary>
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs
index a42f51bf2eb8d..c951edf880015 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs
@@ -2,36 +2,52 @@
 // Licensed under the MIT License.
 
 using System;
-using System.Collections.Generic;
-using System.Text;
 using System.Runtime.InteropServices;
+using System.Text;
 
 namespace Microsoft.ML.OnnxRuntime
 {
-    internal class NativeMemoryInfo : SafeHandle
+    /// <summary>
+    /// See documentation for OrtAllocatorType in C API
+    /// </summary>
+    public enum AllocatorType
+    {
+        DeviceAllocator = 0,
+        ArenaAllocator = 1
+    }
+
+    /// <summary>
+    /// See documentation for OrtMemType in C API
+    /// </summary>
+    public enum MemoryType
     {
-        protected static readonly Lazy<NativeMemoryInfo> _defaultCpuAllocInfo = new Lazy<NativeMemoryInfo>(CreateCpuMemoryInfo);
+        CpuInput = -2,                      // Any CPU memory used by non-CPU execution provider
+        CpuOutput = -1,                     // CPU accessible memory outputted by non-CPU execution provider, i.e. CUDA_PINNED
+        Cpu = CpuOutput,                    // temporary CPU accessible memory allocated by non-CPU execution provider, i.e. CUDA_PINNED
+        Default = 0,                        // the default allocator for execution provider
+    }
+
+    /// <summary>
+    /// This class encapsulates and most of the time owns the underlying native OrtMemoryInfo instance.
+    /// The only exception is when it is returned from the allocator, then the allocator owns the actual
+    /// native instance.
+    /// 
+    /// Use this class to query and create MemoryAllocator instances so you can pre-allocate memory for model
+    /// inputs/outputs and use it for binding
+    /// </summary>
+    public class MemoryInfo : SafeHandle
+    {
+        private static readonly Lazy<MemoryInfo> _defaultCpuAllocInfo = new Lazy<MemoryInfo>(CreateCpuMemoryInfo);
+        private readonly bool _owned; // false if we are exposing OrtMemoryInfo from an allocator which owns it
 
-        private static NativeMemoryInfo CreateCpuMemoryInfo()
+        private static MemoryInfo CreateCpuMemoryInfo()
         {
             IntPtr allocInfo = IntPtr.Zero;
-            try
-            {
-                IntPtr status = NativeMethods.OrtCreateCpuMemoryInfo(NativeMethods.AllocatorType.DeviceAllocator, NativeMethods.MemoryType.Cpu, out allocInfo);
-                NativeApiStatus.VerifySuccess(status);
-            }
-            catch (Exception e)
-            {
-                if (allocInfo != IntPtr.Zero)
-                {
-                    Delete(allocInfo);
-                }
-                throw e;
-            }
-            return new NativeMemoryInfo(allocInfo);
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCpuMemoryInfo(AllocatorType.DeviceAllocator, MemoryType.Cpu, out allocInfo));
+            return new MemoryInfo(allocInfo, true);
         }
 
-        internal static NativeMemoryInfo DefaultInstance
+        public static MemoryInfo DefaultInstance
         {
             get
             {
@@ -55,26 +71,85 @@ public override bool IsInvalid
             }
         }
 
-        private NativeMemoryInfo(IntPtr allocInfo)
-            : base(IntPtr.Zero, true)   //set 0 as invalid pointer
+        /// <summary>
+        /// Default instance construction
+        /// </summary>
+        /// <param name="allocInfo"></param>
+        internal MemoryInfo(IntPtr allocInfo, bool p_owned)
+        : base(IntPtr.Zero, true)   //set 0 as invalid pointer
         {
             handle = allocInfo;
+            _owned = p_owned;
+        }
+
+        // Predefined utf8 encoded allocator names. Use them to construct an instance of
+        // MemoryInfo
+        public static readonly byte[] CPU_allocator = Encoding.UTF8.GetBytes("Cpu" + '\0');
+        public static readonly byte[] CUDA_allocator = Encoding.UTF8.GetBytes("Cuda" + '\0');
+        public static readonly byte[] CUDA_PINNED_allocator = Encoding.UTF8.GetBytes("CudaPinned" + '\0');
+        /// <summary>
+        /// Create an instance of MemoryInfo according to the specification
+        /// Memory info instances are usually used to get a handle of a native allocator
+        /// that is present within the current inference session object. That, in turn, depends
+        /// of what execution providers are available within the binary that you are using and are
+        /// registered with Add methods.
+        /// </summary>
+        /// <param name="utf8_allocator_name">Allocator name. Use of the predefined above.</param>
+        /// <param name="alloc_type">Allocator type</param>
+        /// <param name="device_id">Device id</param>
+        /// <param name="mem_type">Memory type</param>
+        public MemoryInfo(byte[] utf8_allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
+            : base(IntPtr.Zero, true)    //set 0 as invalid pointer
+        {
+            var pinned_bytes = GCHandle.Alloc(utf8_allocator_name, GCHandleType.Pinned);
+            try
+            {
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinned_bytes.AddrOfPinnedObject(),
+                                                                                alloc_type,
+                                                                                device_id,
+                                                                                mem_type,
+                                                                                out handle));
+            }
+            finally
+            {
+                pinned_bytes.Free();
+            }
+            _owned = true;
         }
 
+        /// <summary>
+        /// Create an instance of MemoryInfo according to the specification.
+        /// </summary>
+        /// <param name="allocator_name">Allocator name</param>
+        /// <param name="alloc_type">Allocator type</param>
+        /// <param name="device_id">Device id</param>
+        /// <param name="mem_type">Memory type</param>
+        public MemoryInfo(string allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
+            : this(Encoding.UTF8.GetBytes(allocator_name + '\0'), alloc_type, device_id, mem_type)
+        {
+        }
 
-        private static void Delete(IntPtr nativePtr)
+        public string Name
         {
-            NativeMethods.OrtReleaseMemoryInfo(nativePtr);
+            get
+            {
+                IntPtr utf8_name = IntPtr.Zero;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetName(handle, out utf8_name));
+                // Encoding.UTF8.GetString()
+                // Marshal.PtrTo
+            }
         }
 
         protected override bool ReleaseHandle()
         {
-            Delete(handle);
+            if (_owned)
+            {
+                NativeMethods.OrtReleaseMemoryInfo(handle);
+            }
             return true;
         }
     }
 
-
     internal class NativeMemoryAllocator : SafeHandle
     {
         protected static readonly Lazy<NativeMemoryAllocator> _defaultInstance = new Lazy<NativeMemoryAllocator>(GetDefaultCpuAllocator);
@@ -128,7 +203,7 @@ internal IntPtr Handle
             }
         }
 
-        protected NativeMemoryAllocator(IntPtr allocator)
+        private NativeMemoryAllocator(IntPtr allocator)
             : base(IntPtr.Zero, true)
         {
             this.handle = allocator;
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 92ab5bc864bd2..8c1af79d9d38d 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 using System;
+using System.Linq.Expressions;
 using System.Runtime.InteropServices;
 
 namespace Microsoft.ML.OnnxRuntime
@@ -192,6 +193,7 @@ static NativeMethods()
             OrtCreateSession = (DOrtCreateSession)Marshal.GetDelegateForFunctionPointer(api_.CreateSession, typeof(DOrtCreateSession));
             OrtCreateSessionFromArray = (DOrtCreateSessionFromArray)Marshal.GetDelegateForFunctionPointer(api_.CreateSessionFromArray, typeof(DOrtCreateSessionFromArray));
             OrtRun = (DOrtRun)Marshal.GetDelegateForFunctionPointer(api_.Run, typeof(DOrtRun));
+            OrtRunWithBinding = (DOrtRunWithBinding)Marshal.GetDelegateForFunctionPointer(api_.RunWithBinding, typeof(DOrtRunWithBinding));
             OrtSessionGetInputCount = (DOrtSessionGetInputCount)Marshal.GetDelegateForFunctionPointer(api_.SessionGetInputCount, typeof(DOrtSessionGetInputCount));
             OrtSessionGetOutputCount = (DOrtSessionGetOutputCount)Marshal.GetDelegateForFunctionPointer(api_.SessionGetOutputCount, typeof(DOrtSessionGetOutputCount));
             OrtSessionGetOverridableInitializerCount = (DOrtSessionGetOverridableInitializerCount)Marshal.GetDelegateForFunctionPointer(api_.SessionGetOverridableInitializerCount, typeof(DOrtSessionGetOverridableInitializerCount));
@@ -239,6 +241,10 @@ static NativeMethods()
             OrtCreateMemoryInfo = (DOrtCreateMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.CreateMemoryInfo, typeof(DOrtCreateMemoryInfo));
             OrtCreateCpuMemoryInfo = (DOrtCreateCpuMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.CreateCpuMemoryInfo, typeof(DOrtCreateCpuMemoryInfo));
             OrtReleaseMemoryInfo = (DOrtReleaseMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.ReleaseMemoryInfo, typeof(DOrtReleaseMemoryInfo));
+            OrtCompareMemoryInfo = (DOrtCompareMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.CompareMemoryInfo, typeof(DOrtCompareMemoryInfo));
+            OrtMemoryInfoGetName = (DOrtMemoryInfoGetName)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetName, typeof(DOrtMemoryInfoGetName));
+            OrtMemoryInfoGetId = (DOrtMemoryInfoGetId)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetId, typeof(DOrtMemoryInfoGetId));
+            OrtMemoryInfoGetMemType = (DOrtMemoryInfoGetMemType)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetType, typeof(DOrtMemoryInfoGetMemType));
             OrtGetAllocatorWithDefaultOptions = (DOrtGetAllocatorWithDefaultOptions)Marshal.GetDelegateForFunctionPointer(api_.GetAllocatorWithDefaultOptions, typeof(DOrtGetAllocatorWithDefaultOptions));
             OrtCreateAllocator = (DOrtCreateAllocator)Marshal.GetDelegateForFunctionPointer(api_.CreateAllocator, typeof(DOrtCreateAllocator));
             OrtReleaseAllocator = (DOrtReleaseAllocator)Marshal.GetDelegateForFunctionPointer(api_.ReleaseAllocator, typeof(DOrtReleaseAllocator));
@@ -334,6 +340,13 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
                                                 );
         public static DOrtRun OrtRun;
 
+        public delegate IntPtr /*(ONNStatus*)*/ DOrtRunWithBinding(
+                                                IntPtr /*(OrtSession*)*/ session,
+                                                IntPtr /*(OrtSessionRunOptions*)*/ runOptions, // can not be null
+                                                IntPtr /*(const OrtIoBinding*)*/ io_binding
+                                                );
+        public static DOrtRunWithBinding OrtRunWithBinding;
+
         public delegate IntPtr /*(OrtStatus*)*/ DOrtSessionGetInputCount(
                                                 IntPtr /*(OrtSession*)*/ session,
                                                 out UIntPtr count);
@@ -540,23 +553,6 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         #region Allocator/MemoryInfo API
 
-        //TODO: consider exposing them publicly, when allocator API is exposed
-        public enum AllocatorType
-        {
-            DeviceAllocator = 0,
-            ArenaAllocator = 1
-        }
-
-        //TODO: consider exposing them publicly when allocator API is exposed
-        public enum MemoryType
-        {
-            CpuInput = -2,                      // Any CPU memory used by non-CPU execution provider
-            CpuOutput = -1,                     // CPU accessible memory outputted by non-CPU execution provider, i.e. CUDA_PINNED
-            Cpu = CpuOutput,                    // temporary CPU accessible memory allocated by non-CPU execution provider, i.e. CUDA_PINNED
-            Default = 0,                        // the default allocator for execution provider
-        }
-
-
         public delegate IntPtr /* (OrtStatus*)*/ DOrtCreateMemoryInfo(
                                                             IntPtr /*(const char*) */name,
                                                             AllocatorType allocatorType,
@@ -576,6 +572,31 @@ public enum MemoryType
         public delegate void DOrtReleaseMemoryInfo(IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo);
         public static DOrtReleaseMemoryInfo OrtReleaseMemoryInfo;
 
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtCompareMemoryInfo(
+                                               IntPtr /*(const OrtMemoryInfo*)*/ info1,
+                                               IntPtr /*(const OrtMemoryInfo*)*/ info2,
+                                               out int /*(int* out)*/ result);
+        public static DOrtCompareMemoryInfo OrtCompareMemoryInfo;
+        /**
+        * Do not free the returned value
+        */
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetName(IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info, out IntPtr /*(const char**)*/ name);
+        public static DOrtMemoryInfoGetName OrtMemoryInfoGetName;
+
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetId(IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info, out int /*(int* out)*/ id);
+        public static DOrtMemoryInfoGetId OrtMemoryInfoGetId;
+
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetMemType(
+                                                IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
+                                                out MemoryType /*(OrtMemType*)*/ mem_type);
+        public static DOrtMemoryInfoGetMemType OrtMemoryInfoGetMemType;
+
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetType(
+                                                IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
+                                                out AllocatorType /*(OrtAllocatorType*)*/ alloc_type
+                                                );
+        public static DOrtMemoryInfoGetType OrtMemoryInfoGetType;
+
         public delegate IntPtr /*(OrtStatus*)*/DOrtGetAllocatorWithDefaultOptions(out IntPtr /*(OrtAllocator**)*/ allocator);
         public static DOrtGetAllocatorWithDefaultOptions OrtGetAllocatorWithDefaultOptions;
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index 785fb7be4659d..bc2aff92fa6d0 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -1,7 +1,9 @@
-﻿using Microsoft.ML.OnnxRuntime.Tensors;
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
 using System.Buffers;
-using System.Collections.Generic;
 using System.Diagnostics;
 using System.Runtime.InteropServices;
 using System.Text;
@@ -143,7 +145,7 @@ internal static void CreateNativeOnnxValue(Object value, out IntPtr onnxValue, o
                 }
 
                 IntPtr status = NativeMethods.OrtCreateTensorWithDataAsOrtValue(
-                        NativeMemoryInfo.DefaultInstance.Handle,
+                        MemoryInfo.DefaultInstance.Handle,
                         dataBufferPointer,
                         (UIntPtr)(dataBufferLength),
                         longShape,

From 10db4f6fcaeaadbad7248a1df34608b17a0b85b6 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 14 Jul 2020 10:22:16 -0700
Subject: [PATCH 09/39] Fix unused return value error.

---
 onnxruntime/test/shared_lib/test_inference.cc | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index eb2e874ff900c..b8a5f3a15cd40 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -392,7 +392,7 @@ TEST(CApiTest, create_session_without_session_option) {
 
 TEST(CApiTest, get_allocator_cpu) {
   Ort::SessionOptions session_options;
-  OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1);
+  Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1));
   Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
   Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
   Ort::Allocator cpu_allocator(session, info_cpu);
@@ -408,7 +408,7 @@ TEST(CApiTest, get_allocator_cpu) {
 #ifdef USE_CUDA
 TEST(CApiTest, get_allocator_cuda) {
   Ort::SessionOptions session_options;
-  OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0);
+  Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(session_options, 0));
   Ort::Session session(*ort_env, NAMED_AND_ANON_DIM_PARAM_URI, session_options);
 
   Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);

From 189300d310c7099f97dd200e29276c4467f405d0 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 14 Jul 2020 11:36:34 -0700
Subject: [PATCH 10/39] Implement MemoryInfo, MemoryAllocator and
 MemoryAllocation

---
 .../InferenceSession.cs                       |  27 +-
 .../MemoryAllocator.cs                        | 343 ++++++++++++++++++
 .../NativeMemoryAllocator.cs                  | 218 -----------
 .../NativeOnnxValueHelper.cs                  |  14 +-
 4 files changed, 374 insertions(+), 228 deletions(-)
 create mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
 delete mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 18d7d23ec261f..e0fd0757ae6f2 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -704,7 +704,7 @@ public string EndProfiling()
             string str = null;
 
             IntPtr status = NativeMethods.OrtSessionEndProfiling(_nativeHandle,
-                                                                  NativeMemoryAllocator.DefaultInstance.Handle,
+                                                                  MemoryAllocator.DefaultInstance.Pointer,
                                                                   out nameHandle);
 
             try
@@ -716,7 +716,7 @@ public string EndProfiling()
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    NativeMemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
 
@@ -824,7 +824,7 @@ private string GetOutputName(ulong index)
             IntPtr status = NativeMethods.OrtSessionGetOutputName(
                                                 _nativeHandle,
                                                 (UIntPtr)index,
-                                                NativeMemoryAllocator.DefaultInstance.Handle,
+                                                MemoryAllocator.DefaultInstance.Pointer,
                                                 out nameHandle);
             try
             {
@@ -835,7 +835,7 @@ private string GetOutputName(ulong index)
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    NativeMemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
 
@@ -850,7 +850,7 @@ private string GetInputName(ulong index)
             IntPtr status = NativeMethods.OrtSessionGetInputName(
                                                 _nativeHandle,
                                                 (UIntPtr)index,
-                                                NativeMemoryAllocator.DefaultInstance.Handle,
+                                                MemoryAllocator.DefaultInstance.Pointer,
                                                 out nameHandle);
             try
             {
@@ -862,7 +862,7 @@ private string GetInputName(ulong index)
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    NativeMemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
             return str;
@@ -876,7 +876,7 @@ private string GetOverridableInitializerName(ulong index)
             IntPtr status = NativeMethods.OrtSessionGetOverridableInitializerName(
                                                 _nativeHandle,
                                                 (UIntPtr)index,
-                                                NativeMemoryAllocator.DefaultInstance.Handle,
+                                                MemoryAllocator.DefaultInstance.Pointer,
                                                 out nameHandle);
             try
             {
@@ -888,7 +888,7 @@ private string GetOverridableInitializerName(ulong index)
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    NativeMemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
             return str;
@@ -996,6 +996,17 @@ internal static NodeMetadata GetMetadataFromTypeInfo(IntPtr typeInfo)
             return new NodeMetadata(valueType, intDimensions, symbolicDimensions, dotnetType);
         }
 
+        /// <summary>
+        /// Other classes access
+        /// </summary>
+        internal IntPtr Handle
+        {
+            get
+            {
+                return _nativeHandle;
+            }
+        }
+
         #endregion
 
         #region destructors disposers
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
new file mode 100644
index 0000000000000..d9dd25c9721a7
--- /dev/null
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
@@ -0,0 +1,343 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using System;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Microsoft.ML.OnnxRuntime
+{
+    /// <summary>
+    /// See documentation for OrtAllocatorType in C API
+    /// </summary>
+    public enum AllocatorType
+    {
+        DeviceAllocator = 0,
+        ArenaAllocator = 1
+    }
+
+    /// <summary>
+    /// See documentation for OrtMemType in C API
+    /// </summary>
+    public enum MemoryType
+    {
+        CpuInput = -2,                      // Any CPU memory used by non-CPU execution provider
+        CpuOutput = -1,                     // CPU accessible memory outputted by non-CPU execution provider, i.e. CUDA_PINNED
+        Cpu = CpuOutput,                    // temporary CPU accessible memory allocated by non-CPU execution provider, i.e. CUDA_PINNED
+        Default = 0,                        // the default allocator for execution provider
+    }
+
+    /// <summary>
+    /// This class encapsulates and most of the time owns the underlying native OrtMemoryInfo instance.
+    /// Instance returned from MemoryAllocator will not own OrtMemoryInfo, the class must be disposed
+    /// regardless.
+    /// 
+    /// Use this class to query and create MemoryAllocator instances so you can pre-allocate memory for model
+    /// inputs/outputs and use it for binding. Instances of the class can also used to created OrtValues bound
+    /// to pre-allocated memory. In that case, the instance of MemoryInfo contains the information about the allocator
+    /// used to allocate the underlying memory.
+    /// </summary>
+    public class MemoryInfo : IDisposable
+    {
+        private static readonly Lazy<MemoryInfo> _defaultCpuAllocInfo = new Lazy<MemoryInfo>(CreateCpuMemoryInfo);
+        private IntPtr _pointer;
+        private readonly bool _owned; // false if we are exposing OrtMemoryInfo from an allocator which owns it
+
+        private static MemoryInfo CreateCpuMemoryInfo()
+        {
+            IntPtr allocInfo = IntPtr.Zero;
+            // Returns OrtMemoryInfo instance that needs to be disposed
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCpuMemoryInfo(AllocatorType.DeviceAllocator, MemoryType.Cpu, out allocInfo));
+            return new MemoryInfo(allocInfo, true);
+        }
+
+        public static MemoryInfo DefaultInstance
+        {
+            get
+            {
+                return _defaultCpuAllocInfo.Value;
+            }
+        }
+
+        internal IntPtr Pointer
+        {
+            get
+            {
+                return _pointer;
+            }
+        }
+
+        /// <summary>
+        /// This allocator takes an native pointer to already existing
+        /// instance of MemoryInfo. That instance may either be owned or not
+        /// owned. In the latter case, this class serves to expose native properties
+        /// of the instance.
+        /// </summary>
+        /// <param name="allocInfo"></param>
+        internal MemoryInfo(IntPtr allocInfo, bool owned)
+        {
+            _pointer = allocInfo;
+            _owned = owned;
+        }
+
+        // Predefined utf8 encoded allocator names. Use them to construct an instance of
+        // MemoryInfo
+        public static readonly byte[] CPU_allocator = Encoding.UTF8.GetBytes("Cpu" + '\0');
+        public static readonly byte[] CUDA_allocator = Encoding.UTF8.GetBytes("Cuda" + '\0');
+        public static readonly byte[] CUDA_PINNED_allocator = Encoding.UTF8.GetBytes("CudaPinned" + '\0');
+        /// <summary>
+        /// Create an instance of MemoryInfo according to the specification
+        /// Memory info instances are usually used to get a handle of a native allocator
+        /// that is present within the current inference session object. That, in turn, depends
+        /// of what execution providers are available within the binary that you are using and are
+        /// registered with Add methods.
+        /// </summary>
+        /// <param name="utf8_allocator_name">Allocator name. Use of the predefined above.</param>
+        /// <param name="alloc_type">Allocator type</param>
+        /// <param name="device_id">Device id</param>
+        /// <param name="mem_type">Memory type</param>
+        public MemoryInfo(byte[] utf8_allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
+        {
+            var pinned_name = GCHandle.Alloc(utf8_allocator_name, GCHandleType.Pinned);
+            try
+            {
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinned_name.AddrOfPinnedObject(),
+                                                                                alloc_type,
+                                                                                device_id,
+                                                                                mem_type,
+                                                                                out _pointer));
+            }
+            finally
+            {
+                pinned_name.Free();
+            }
+            _owned = true;
+        }
+
+        /// <summary>
+        /// Create an instance of MemoryInfo according to the specification.
+        /// </summary>
+        /// <param name="allocator_name">Allocator name</param>
+        /// <param name="alloc_type">Allocator type</param>
+        /// <param name="device_id">Device id</param>
+        /// <param name="mem_type">Memory type</param>
+        public MemoryInfo(string allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
+            : this(Encoding.UTF8.GetBytes(allocator_name + '\0'), alloc_type, device_id, mem_type)
+        {
+        }
+
+        public string Name
+        {
+            get
+            {
+                IntPtr utf8_name = IntPtr.Zero;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetName(_pointer, out utf8_name));
+                return NativeOnnxValueHelper.StringFromNativeUtf8(utf8_name);
+            }
+        }
+
+        public int Id
+        {
+            get
+            {
+                int id = 0;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetId(_pointer, out id));
+                return id;
+            }
+        }
+
+        /// <summary>
+        ///  The below 2 are really properties but naming them is a challenge
+        ///  as names would conflict with the returned type. Also, there are native
+        ///  calls behind them so exposing them as Get() would be appropriate.
+        /// </summary>
+        /// <returns></returns>
+        public MemoryType GetMemoryType()
+        {
+            MemoryType mem_type = MemoryType.Default;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetMemType(_pointer, out mem_type));
+            return mem_type;
+        }
+
+        public AllocatorType GetAllocatorType()
+        {
+            AllocatorType alloc_type = AllocatorType.ArenaAllocator;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetType(_pointer, out alloc_type));
+            return alloc_type;
+        }
+
+        public bool CompareMemoryInfo(MemoryInfo other)
+        {
+            int result = -1;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCompareMemoryInfo(_pointer, other._pointer, out result));
+            return (result == 0);
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing && _owned)
+            {
+                NativeMethods.OrtReleaseMemoryInfo(_pointer);
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+        // We intentionally do not provider an finalizer for the class
+    }
+
+    /// <summary>
+    /// This class represents memory allocation made by a specific onnxruntime
+    /// allocator. Use MemoryAllocator.Allocate() to obtain an instance of this class.
+    /// It implements IDisposable and makes use of the original allocator
+    /// used to allocate the memory. The lifespan of the allocator instance must eclipse the
+    /// lifespan of the allocation. Or, if you prefer, all MemoryAllocation instances must be
+    /// disposed of before the corresponding allocator instances are disposed of.
+    /// </summary>
+    public class MemoryAllocation : IDisposable
+    {
+        private MemoryAllocator _allocator;
+        private IntPtr _pointer;
+        private int _size;
+
+        internal MemoryAllocation(MemoryAllocator allocator, IntPtr pointer, int size)
+        {
+            _allocator = allocator;
+            _pointer = pointer;
+            _size = size;
+        }
+
+        internal IntPtr Pointer
+        {
+            get
+            {
+                return _pointer;
+            }
+        }
+
+        /// <summary>
+        /// Returns the size of the allocation
+        /// </summary>
+        public int Size
+        {
+            get
+            {
+                return _size;
+            }
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                _allocator.FreeMemory(_pointer);
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+    }
+
+    public class MemoryAllocator : IDisposable
+    {
+        private static readonly Lazy<MemoryAllocator> _defaultInstance = new Lazy<MemoryAllocator>(GetDefaultCpuAllocator);
+        private IntPtr _pointer;
+        private readonly bool _owned;
+
+        private static MemoryAllocator GetDefaultCpuAllocator()
+        {
+            IntPtr allocator = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetAllocatorWithDefaultOptions(out allocator));
+            // Instance of default cpu allocator is a native singleton
+            // Do not dispose of
+            return new MemoryAllocator(allocator, false);
+        }
+
+        public static MemoryAllocator DefaultInstance // May throw exception in every access, if the constructor have thrown an exception
+        {
+            get
+            {
+                return _defaultInstance.Value;
+            }
+        }
+
+        internal IntPtr Pointer
+        {
+            get
+            {
+                return _pointer;
+            }
+        }
+
+        /// <summary>
+        /// Internal constructor wraps existing native allocators
+        /// </summary>
+        /// <param name="allocator"></param>
+        /// <param name="owned"></param>
+        internal MemoryAllocator(IntPtr allocator, bool owned)
+        {
+            this._pointer = allocator;
+            this._owned = owned;
+        }
+
+        public MemoryAllocator(InferenceSession session, MemoryInfo memInfo)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateAllocator(session.Handle, memInfo.Pointer, out _pointer));
+            _owned = true;
+        }
+
+        public MemoryInfo Info
+        {
+            get
+            {
+                IntPtr mem_info = IntPtr.Zero;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorGetInfo(_pointer, out mem_info));
+                // This serves as an exposure of memory_info owned by the allocator
+                return new MemoryInfo(mem_info, false);
+            }
+        }
+
+        /// <summary>
+        /// Allocate native memory
+        /// </summary>
+        /// <param name="size"></param>
+        /// <returns></returns>
+        public MemoryAllocation Allocate(int size)
+        {
+            IntPtr allocation = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorAlloc(_pointer, (UIntPtr)size, out allocation));
+            return new MemoryAllocation(this, allocation, size);
+        }
+
+        /// <summary>
+        /// This internal interface is used for freeing memory
+        /// </summary>
+        /// <param name="allocation"></param>
+        internal void FreeMemory(IntPtr allocation)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorFree(_pointer, allocation));
+        }
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing && _owned)
+            {
+                NativeMethods.OrtReleaseAllocator(_pointer);
+            }
+        }
+
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        // We intentionally do not provider an finalizer for the class
+    }
+}
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs
deleted file mode 100644
index c951edf880015..0000000000000
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMemoryAllocator.cs
+++ /dev/null
@@ -1,218 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-using System;
-using System.Runtime.InteropServices;
-using System.Text;
-
-namespace Microsoft.ML.OnnxRuntime
-{
-    /// <summary>
-    /// See documentation for OrtAllocatorType in C API
-    /// </summary>
-    public enum AllocatorType
-    {
-        DeviceAllocator = 0,
-        ArenaAllocator = 1
-    }
-
-    /// <summary>
-    /// See documentation for OrtMemType in C API
-    /// </summary>
-    public enum MemoryType
-    {
-        CpuInput = -2,                      // Any CPU memory used by non-CPU execution provider
-        CpuOutput = -1,                     // CPU accessible memory outputted by non-CPU execution provider, i.e. CUDA_PINNED
-        Cpu = CpuOutput,                    // temporary CPU accessible memory allocated by non-CPU execution provider, i.e. CUDA_PINNED
-        Default = 0,                        // the default allocator for execution provider
-    }
-
-    /// <summary>
-    /// This class encapsulates and most of the time owns the underlying native OrtMemoryInfo instance.
-    /// The only exception is when it is returned from the allocator, then the allocator owns the actual
-    /// native instance.
-    /// 
-    /// Use this class to query and create MemoryAllocator instances so you can pre-allocate memory for model
-    /// inputs/outputs and use it for binding
-    /// </summary>
-    public class MemoryInfo : SafeHandle
-    {
-        private static readonly Lazy<MemoryInfo> _defaultCpuAllocInfo = new Lazy<MemoryInfo>(CreateCpuMemoryInfo);
-        private readonly bool _owned; // false if we are exposing OrtMemoryInfo from an allocator which owns it
-
-        private static MemoryInfo CreateCpuMemoryInfo()
-        {
-            IntPtr allocInfo = IntPtr.Zero;
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCpuMemoryInfo(AllocatorType.DeviceAllocator, MemoryType.Cpu, out allocInfo));
-            return new MemoryInfo(allocInfo, true);
-        }
-
-        public static MemoryInfo DefaultInstance
-        {
-            get
-            {
-                return _defaultCpuAllocInfo.Value;
-            }
-        }
-
-        internal IntPtr Handle  // May throw exception in every access, if the constructor have thrown an exception
-        {
-            get
-            {
-                return handle;
-            }
-        }
-
-        public override bool IsInvalid
-        {
-            get
-            {
-                return (handle == IntPtr.Zero);
-            }
-        }
-
-        /// <summary>
-        /// Default instance construction
-        /// </summary>
-        /// <param name="allocInfo"></param>
-        internal MemoryInfo(IntPtr allocInfo, bool p_owned)
-        : base(IntPtr.Zero, true)   //set 0 as invalid pointer
-        {
-            handle = allocInfo;
-            _owned = p_owned;
-        }
-
-        // Predefined utf8 encoded allocator names. Use them to construct an instance of
-        // MemoryInfo
-        public static readonly byte[] CPU_allocator = Encoding.UTF8.GetBytes("Cpu" + '\0');
-        public static readonly byte[] CUDA_allocator = Encoding.UTF8.GetBytes("Cuda" + '\0');
-        public static readonly byte[] CUDA_PINNED_allocator = Encoding.UTF8.GetBytes("CudaPinned" + '\0');
-        /// <summary>
-        /// Create an instance of MemoryInfo according to the specification
-        /// Memory info instances are usually used to get a handle of a native allocator
-        /// that is present within the current inference session object. That, in turn, depends
-        /// of what execution providers are available within the binary that you are using and are
-        /// registered with Add methods.
-        /// </summary>
-        /// <param name="utf8_allocator_name">Allocator name. Use of the predefined above.</param>
-        /// <param name="alloc_type">Allocator type</param>
-        /// <param name="device_id">Device id</param>
-        /// <param name="mem_type">Memory type</param>
-        public MemoryInfo(byte[] utf8_allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
-            : base(IntPtr.Zero, true)    //set 0 as invalid pointer
-        {
-            var pinned_bytes = GCHandle.Alloc(utf8_allocator_name, GCHandleType.Pinned);
-            try
-            {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinned_bytes.AddrOfPinnedObject(),
-                                                                                alloc_type,
-                                                                                device_id,
-                                                                                mem_type,
-                                                                                out handle));
-            }
-            finally
-            {
-                pinned_bytes.Free();
-            }
-            _owned = true;
-        }
-
-        /// <summary>
-        /// Create an instance of MemoryInfo according to the specification.
-        /// </summary>
-        /// <param name="allocator_name">Allocator name</param>
-        /// <param name="alloc_type">Allocator type</param>
-        /// <param name="device_id">Device id</param>
-        /// <param name="mem_type">Memory type</param>
-        public MemoryInfo(string allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
-            : this(Encoding.UTF8.GetBytes(allocator_name + '\0'), alloc_type, device_id, mem_type)
-        {
-        }
-
-        public string Name
-        {
-            get
-            {
-                IntPtr utf8_name = IntPtr.Zero;
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetName(handle, out utf8_name));
-                // Encoding.UTF8.GetString()
-                // Marshal.PtrTo
-            }
-        }
-
-        protected override bool ReleaseHandle()
-        {
-            if (_owned)
-            {
-                NativeMethods.OrtReleaseMemoryInfo(handle);
-            }
-            return true;
-        }
-    }
-
-    internal class NativeMemoryAllocator : SafeHandle
-    {
-        protected static readonly Lazy<NativeMemoryAllocator> _defaultInstance = new Lazy<NativeMemoryAllocator>(GetDefaultCpuAllocator);
-
-        private static NativeMemoryAllocator GetDefaultCpuAllocator()
-        {
-            IntPtr allocator = IntPtr.Zero;
-            try
-            {
-                IntPtr status = NativeMethods.OrtGetAllocatorWithDefaultOptions(out allocator);
-                NativeApiStatus.VerifySuccess(status);
-            }
-            catch (Exception e)
-            {
-                throw e;
-            }
-
-            return new NativeMemoryAllocator(allocator);
-        }
-
-        static internal NativeMemoryAllocator DefaultInstance // May throw exception in every access, if the constructor have thrown an exception
-        {
-            get
-            {
-                return _defaultInstance.Value;
-            }
-        }
-
-        /// <summary>
-        /// Releases native memory previously allocated by the allocator
-        /// </summary>
-        /// <param name="memory"></param>
-        internal void FreeMemory(IntPtr memory)
-        {
-            NativeMethods.OrtAllocatorFree(handle, memory);
-        }
-
-        public override bool IsInvalid
-        {
-            get
-            {
-                return (this.handle == IntPtr.Zero);
-            }
-        }
-
-        internal IntPtr Handle
-        {
-            get
-            {
-                return handle;
-            }
-        }
-
-        private NativeMemoryAllocator(IntPtr allocator)
-            : base(IntPtr.Zero, true)
-        {
-            this.handle = allocator;
-        }
-
-        protected override bool ReleaseHandle()
-        {
-            return true;
-        }
-    }
-
-}
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index bc2aff92fa6d0..ff754b0fe1be3 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -145,7 +145,7 @@ internal static void CreateNativeOnnxValue(Object value, out IntPtr onnxValue, o
                 }
 
                 IntPtr status = NativeMethods.OrtCreateTensorWithDataAsOrtValue(
-                        MemoryInfo.DefaultInstance.Handle,
+                        MemoryInfo.DefaultInstance.Pointer,
                         dataBufferPointer,
                         (UIntPtr)(dataBufferLength),
                         longShape,
@@ -185,7 +185,7 @@ out onnxValue
                 try
                 {
                     NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorAsOrtValue(
-                                                    NativeMemoryAllocator.DefaultInstance.Handle,
+                                                    MemoryAllocator.DefaultInstance.Pointer,
                                                     longShape,
                                                     (UIntPtr)(longShape.Length),
                                                     TensorElementType.String,
@@ -353,6 +353,16 @@ private static bool TryPinAsTensor<T>(
 
             return false;
         }
+
+        internal static string StringFromNativeUtf8(IntPtr nativeUtf8) 
+        {
+            // .NET 5.0 has Marshal.PtrToStringUTF8 that does the below
+            int len = 0;
+            while (Marshal.ReadByte(nativeUtf8, len) != 0) ++len;
+            byte[] buffer = new byte[len];
+            Marshal.Copy(nativeUtf8, buffer, 0, buffer.Length);
+            return Encoding.UTF8.GetString(buffer, 0, buffer.Length);
+        }
     }
 
     internal enum TensorElementType

From a243331be0a823d7113990c91a06b1f8e86a39a7 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 14 Jul 2020 16:17:19 -0700
Subject: [PATCH 11/39] Add tests and make them run.

---
 .../MemoryAllocator.cs                        | 14 ++-
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  3 +-
 .../InferenceTest.cs                          | 95 +++++++++++++++++--
 3 files changed, 104 insertions(+), 8 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
index d9dd25c9721a7..296618d852c39 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
@@ -172,7 +172,7 @@ public bool CompareMemoryInfo(MemoryInfo other)
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCompareMemoryInfo(_pointer, other._pointer, out result));
             return (result == 0);
         }
-
+        #region IDisposable Support
         protected virtual void Dispose(bool disposing)
         {
             if (disposing && _owned)
@@ -187,6 +187,7 @@ public void Dispose()
             GC.SuppressFinalize(this);
         }
         // We intentionally do not provider an finalizer for the class
+        #endregion
     }
 
     /// <summary>
@@ -229,6 +230,14 @@ public int Size
             }
         }
 
+        public MemoryInfo Info
+        {
+            get
+            {
+                return _allocator.Info;
+            }
+        }
+        #region IDisposable Support
         protected virtual void Dispose(bool disposing)
         {
             if (disposing)
@@ -242,6 +251,7 @@ public void Dispose()
             Dispose(true);
             GC.SuppressFinalize(this);
         }
+        #endregion
     }
 
     public class MemoryAllocator : IDisposable
@@ -324,6 +334,7 @@ internal void FreeMemory(IntPtr allocation)
             NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorFree(_pointer, allocation));
         }
 
+        #region IDisposable Support
         protected virtual void Dispose(bool disposing)
         {
             if (disposing && _owned)
@@ -339,5 +350,6 @@ public void Dispose()
         }
 
         // We intentionally do not provider an finalizer for the class
+        #endregion
     }
 }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 8c1af79d9d38d..9e7175b7aa0ef 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -244,7 +244,8 @@ static NativeMethods()
             OrtCompareMemoryInfo = (DOrtCompareMemoryInfo)Marshal.GetDelegateForFunctionPointer(api_.CompareMemoryInfo, typeof(DOrtCompareMemoryInfo));
             OrtMemoryInfoGetName = (DOrtMemoryInfoGetName)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetName, typeof(DOrtMemoryInfoGetName));
             OrtMemoryInfoGetId = (DOrtMemoryInfoGetId)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetId, typeof(DOrtMemoryInfoGetId));
-            OrtMemoryInfoGetMemType = (DOrtMemoryInfoGetMemType)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetType, typeof(DOrtMemoryInfoGetMemType));
+            OrtMemoryInfoGetMemType = (DOrtMemoryInfoGetMemType)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetMemType, typeof(DOrtMemoryInfoGetMemType));
+            OrtMemoryInfoGetType = (DOrtMemoryInfoGetType)Marshal.GetDelegateForFunctionPointer(api_.MemoryInfoGetType, typeof(DOrtMemoryInfoGetType));
             OrtGetAllocatorWithDefaultOptions = (DOrtGetAllocatorWithDefaultOptions)Marshal.GetDelegateForFunctionPointer(api_.GetAllocatorWithDefaultOptions, typeof(DOrtGetAllocatorWithDefaultOptions));
             OrtCreateAllocator = (DOrtCreateAllocator)Marshal.GetDelegateForFunctionPointer(api_.CreateAllocator, typeof(DOrtCreateAllocator));
             OrtReleaseAllocator = (DOrtReleaseAllocator)Marshal.GetDelegateForFunctionPointer(api_.ReleaseAllocator, typeof(DOrtReleaseAllocator));
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index c91497b21102a..7f36a2777f1bb 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1604,12 +1604,16 @@ private void TestModelSerialization()
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
             string modelOutputPath = Path.Combine(Directory.GetCurrentDirectory(), "optimized-squeezenet.onnx");
             // Set the optimized model file path to assert that no exception are thrown.
-            SessionOptions options = new SessionOptions();
-            options.OptimizedModelFilePath = modelOutputPath;
-            options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;
-            var session = new InferenceSession(modelPath, options);
-            Assert.NotNull(session);
-            Assert.True(File.Exists(modelOutputPath));
+            using (SessionOptions options = new SessionOptions())
+            {
+                options.OptimizedModelFilePath = modelOutputPath;
+                options.GraphOptimizationLevel = GraphOptimizationLevel.ORT_ENABLE_BASIC;
+                using (var session = new InferenceSession(modelPath, options))
+                {
+                    Assert.NotNull(session);
+                    Assert.True(File.Exists(modelOutputPath));
+                }
+            }
         }
 
         [GpuFact]
@@ -1653,6 +1657,85 @@ private void TestInferenceSessionWithByteArray()
             }
         }
 
+        void TestCPUAllocator(InferenceSession session)
+        {
+            int device_id = 0;
+            using (var info_cpu = new MemoryInfo(MemoryInfo.CPU_allocator, AllocatorType.ArenaAllocator, device_id, MemoryType.Default))
+            {
+                Assert.NotEqual(info_cpu.Pointer, IntPtr.Zero);
+                Assert.Equal("Cpu", info_cpu.Name);
+                Assert.Equal(device_id, info_cpu.Id);
+                Assert.Equal(AllocatorType.ArenaAllocator, info_cpu.GetAllocatorType());
+                Assert.Equal(MemoryType.Default, info_cpu.GetMemoryType());
+
+                using (var allocator = new MemoryAllocator(session, info_cpu))
+                {
+                    Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
+                    var alloc_info = allocator.Info;
+                    Assert.True(info_cpu.CompareMemoryInfo(alloc_info));
+
+                    int size = 1024;
+                    MemoryAllocation chunk = allocator.Allocate(size);
+                    Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
+                    Assert.Equal(chunk.Size, size);
+                    Assert.True(chunk.Info.CompareMemoryInfo(alloc_info));
+                    chunk.Dispose();
+                    alloc_info.Dispose();
+                }
+            }
+        }
+
+#if USE_CUDA
+        void TestCUDAAllocator(InferenceSession session)
+        {
+            int device_id = 0;
+            using (var info_cuda = new MemoryInfo(MemoryInfo.CUDA_allocator, AllocatorType.ArenaAllocator, device_id, MemoryType.Default))
+            {
+                Assert.NotEqual(info_cuda.Pointer, IntPtr.Zero);
+                Assert.Equal("Cuda", info_cuda.Name);
+                Assert.Equal(device_id, info_cuda.Id);
+                Assert.Equal(AllocatorType.ArenaAllocator, info_cuda.GetAllocatorType());
+                Assert.Equal(MemoryType.Default, info_cuda.GetMemoryType());
+
+                using (var allocator = new MemoryAllocator(session, info_cuda))
+                {
+                    Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
+                    var alloc_info = allocator.Info;
+                    Assert.True(info_cuda.CompareMemoryInfo(alloc_info));
+
+                    int size = 1024;
+                    MemoryAllocation chunk = allocator.Allocate(size);
+                    Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
+                    Assert.Equal(chunk.Size, size);
+                    Assert.True(chunk.Info.CompareMemoryInfo(alloc_info));
+                    chunk.Dispose();
+                    alloc_info.Dispose();
+                }
+            }
+        }
+#endif
+
+        [Fact]
+        private void TestAllocator()
+        {
+            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
+            // Set the optimized model file path to assert that no exception are thrown.
+            using (SessionOptions options = new SessionOptions())
+            {
+                options.AppendExecutionProvider_CPU(1);
+#if USE_CUDA
+                options.AppendExecutionProvider_CUDA(0);
+#endif
+                using (var session = new InferenceSession(modelPath, options))
+                {
+                    TestCPUAllocator(session);
+#if USE_CUDA
+                    TestCUDAAllocator(session);
+#endif
+                }
+            }
+        }
+
         [DllImport("kernel32", SetLastError = true)]
         static extern IntPtr LoadLibrary(string lpFileName);
 

From 90e1571a6b350893299cd250734b5bde213690c8 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 15 Jul 2020 18:47:00 -0700
Subject: [PATCH 12/39] Introduce IOBinding and OrtValue

---
 .../InferenceSession.cs                       | 58 ++++++------
 .../src/Microsoft.ML.OnnxRuntime/IoBinding.cs | 89 +++++++++++++++++++
 .../MemoryAllocator.cs                        | 80 ++++++++++-------
 .../NativeOnnxValueHelper.cs                  | 43 ++++++++-
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  | 46 ++++++++++
 .../InferenceTest.cs                          |  4 +-
 6 files changed, 257 insertions(+), 63 deletions(-)
 create mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
 create mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index e0fd0757ae6f2..981015dd0c65b 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -695,6 +695,16 @@ public void Run(
             }
         }
 
+        public IoBinding CreateIOBinding()
+        {
+            return new IoBinding(this);
+        }
+
+        public void Run(RunOptions runOptions, IoBinding ioBinding)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
+        }
+
         /// <summary>
         /// Ends profiling for the session. Returns the profile file name.
         /// 
@@ -703,14 +713,12 @@ public string EndProfiling()
             IntPtr nameHandle = IntPtr.Zero;
             string str = null;
 
-            IntPtr status = NativeMethods.OrtSessionEndProfiling(_nativeHandle,
-                                                                  MemoryAllocator.DefaultInstance.Pointer,
-                                                                  out nameHandle);
-
             try
             {
-                NativeApiStatus.VerifySuccess(status);
-                str = Marshal.PtrToStringAnsi(nameHandle);
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionEndProfiling(_nativeHandle,
+                                                                                   MemoryAllocator.DefaultInstance.Pointer,
+                                                                                   out nameHandle));
+                str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
             finally
             {
@@ -821,15 +829,15 @@ private string GetOutputName(ulong index)
             IntPtr nameHandle = IntPtr.Zero;
             string str = null;
 
-            IntPtr status = NativeMethods.OrtSessionGetOutputName(
-                                                _nativeHandle,
-                                                (UIntPtr)index,
-                                                MemoryAllocator.DefaultInstance.Pointer,
-                                                out nameHandle);
             try
             {
-                NativeApiStatus.VerifySuccess(status);
-                str = Marshal.PtrToStringAnsi(nameHandle); //assumes charset = ANSI
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOutputName(
+                                               _nativeHandle,
+                                               (UIntPtr)index,
+                                               MemoryAllocator.DefaultInstance.Pointer,
+                                               out nameHandle));
+
+                str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
             finally
             {
@@ -847,16 +855,15 @@ private string GetInputName(ulong index)
             IntPtr nameHandle = IntPtr.Zero;
             string str = null;
 
-            IntPtr status = NativeMethods.OrtSessionGetInputName(
-                                                _nativeHandle,
-                                                (UIntPtr)index,
-                                                MemoryAllocator.DefaultInstance.Pointer,
-                                                out nameHandle);
             try
             {
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetInputName(
+                                               _nativeHandle,
+                                               (UIntPtr)index,
+                                               MemoryAllocator.DefaultInstance.Pointer,
+                                               out nameHandle));
 
-                NativeApiStatus.VerifySuccess(status);
-                str = Marshal.PtrToStringAnsi(nameHandle); //assumes charset = ANSI
+                str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
             finally
             {
@@ -873,16 +880,15 @@ private string GetOverridableInitializerName(ulong index)
             IntPtr nameHandle = IntPtr.Zero;
             string str = null;
 
-            IntPtr status = NativeMethods.OrtSessionGetOverridableInitializerName(
+            try
+            {
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOverridableInitializerName(
                                                 _nativeHandle,
                                                 (UIntPtr)index,
                                                 MemoryAllocator.DefaultInstance.Pointer,
-                                                out nameHandle);
-            try
-            {
+                                                out nameHandle));
 
-                NativeApiStatus.VerifySuccess(status);
-                str = Marshal.PtrToStringAnsi(nameHandle); //assumes charset = ANSI
+                str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
             finally
             {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
new file mode 100644
index 0000000000000..550bb3a94a5d1
--- /dev/null
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
@@ -0,0 +1,89 @@
+﻿using System;
+using System.Buffers;
+using System.Collections.Generic;
+using System.Runtime.InteropServices;
+using System.Text;
+
+namespace Microsoft.ML.OnnxRuntime
+{
+    /// <summary>
+    /// This class enable to bind inputs and outputs to pre-allocated
+    /// memory. This enables interesting scenarios. For example, if your input
+    /// already resides in some pre-allocated memory even if on a device you bind
+    /// that piece of memory to an input name and shape and onnxruntime will use that as input.
+    /// Other traditional inputs can also be bound that already exists as Tensors
+    /// </summary>
+    public class IoBinding : IDisposable
+    {
+        private IntPtr _handle;
+
+        internal IoBinding(InferenceSession session)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateIoBinding(session.Handle, out _handle));
+        }
+
+        internal IntPtr Handle
+        {
+            get
+            {
+                return _handle;
+            }
+        }
+
+        public void BindInput(string name, MemoryAllocation allocation)
+        {
+        }
+
+        public void BindInputs(ReadOnlySequence<string> names, ReadOnlySequence<MemoryAllocation> allocations)
+        {
+            if(names.IsEmpty || names.Length != allocations.Length)
+            {
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Names and Allocations must be of equal length");
+            }
+
+            using (var ortValues = new DisposableList<OrtValue>())
+            using (var pinnedNames = new DisposableList<MemoryHandle>())
+            {
+                for(int i = 0; i < names.Length; ++i)
+                {
+                    var first = names.Start;
+                    ReadOnlyMemory<string> name;
+                    bool next = true;
+                    //while(next)
+                    //{
+                    //    names.TryGet(ref first, out name, true);
+                    //MemoryHandle pinned_utf8 = new Memory<byte>(Encoding.UTF8.GetBytes(string.Concat(name, '\0'))).Pin();
+                    //pinnedNames.Add(pinned_utf8);
+                }
+            }
+        }
+
+        public void ClearBoundInputs()
+        {
+            NativeMethods.OrtClearBoundInputs(_handle);
+        }
+
+        public void ClearBoundOutputs()
+        {
+            NativeMethods.OrtClearBoundOutputs(_handle);
+        }
+
+        #region Disposable Support
+        protected virtual void Dispose(bool disposing)
+        {
+            if(disposing)
+            {
+                NativeMethods.OrtReleaseIoBinding(_handle);
+                _handle = IntPtr.Zero;
+            }
+        }
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        // No need for the finalizer
+        #endregion
+    }
+}
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
index 296618d852c39..5ffe5749129fc 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
@@ -98,19 +98,14 @@ internal MemoryInfo(IntPtr allocInfo, bool owned)
         /// <param name="mem_type">Memory type</param>
         public MemoryInfo(byte[] utf8_allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
         {
-            var pinned_name = GCHandle.Alloc(utf8_allocator_name, GCHandleType.Pinned);
-            try
+            using (var pinned_handle = new PinnedGCHandle(GCHandle.Alloc(utf8_allocator_name, GCHandleType.Pinned)))
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinned_name.AddrOfPinnedObject(),
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinned_handle.Pointer,
                                                                                 alloc_type,
                                                                                 device_id,
                                                                                 mem_type,
                                                                                 out _pointer));
             }
-            finally
-            {
-                pinned_name.Free();
-            }
             _owned = true;
         }
 
@@ -175,9 +170,13 @@ public bool CompareMemoryInfo(MemoryInfo other)
         #region IDisposable Support
         protected virtual void Dispose(bool disposing)
         {
-            if (disposing && _owned)
+            if (disposing)
             {
-                NativeMethods.OrtReleaseMemoryInfo(_pointer);
+                if (_owned)
+                {
+                    NativeMethods.OrtReleaseMemoryInfo(_pointer);
+                }
+                _pointer = IntPtr.Zero;
             }
         }
 
@@ -201,34 +200,45 @@ public void Dispose()
     public class MemoryAllocation : IDisposable
     {
         private MemoryAllocator _allocator;
-        private IntPtr _pointer;
-        private int _size;
 
-        internal MemoryAllocation(MemoryAllocator allocator, IntPtr pointer, int size)
+        /// <summary>
+        /// Bind an arbitrary piece of native memory to the instance
+        /// The instance will not have the ownership of this memory.
+        /// </summary>
+        /// <param name="pointer"></param>
+        /// <param name="size"></param>
+        public MemoryAllocation(IntPtr pointer, uint size)
         {
-            _allocator = allocator;
-            _pointer = pointer;
-            _size = size;
+            _allocator = null;
+            Pointer = pointer;
+            Size = size;
         }
 
-        internal IntPtr Pointer
+        /// <summary>
+        /// This an instance with a piece of memory allocated
+        /// by onnxruntime MemoryAllocator. The same allocator will be used for
+        /// for memory disposal. For memory allocated elsewhere, the instance will not own the memory
+        /// and will not dispose of it.
+        /// </summary>
+        /// <param name="allocator"></param>
+        /// <param name="pointer"></param>
+        /// <param name="size"></param>
+        internal MemoryAllocation(MemoryAllocator allocator, IntPtr pointer, uint size)
         {
-            get
-            {
-                return _pointer;
-            }
+            _allocator = allocator;
+            Pointer = pointer;
+            Size = size;
         }
 
+        /// <summary>
+        /// Internal accessor to call native methods
+        /// </summary>
+        internal IntPtr Pointer { get; private set; }
+
         /// <summary>
         /// Returns the size of the allocation
         /// </summary>
-        public int Size
-        {
-            get
-            {
-                return _size;
-            }
-        }
+        public uint Size { get; private set; }
 
         public MemoryInfo Info
         {
@@ -242,7 +252,11 @@ protected virtual void Dispose(bool disposing)
         {
             if (disposing)
             {
-                _allocator.FreeMemory(_pointer);
+                if (_allocator != null)
+                {
+                    _allocator.FreeMemory(Pointer);
+                }
+                Pointer = IntPtr.Zero;
             }
         }
 
@@ -318,7 +332,7 @@ public MemoryInfo Info
         /// </summary>
         /// <param name="size"></param>
         /// <returns></returns>
-        public MemoryAllocation Allocate(int size)
+        public MemoryAllocation Allocate(uint size)
         {
             IntPtr allocation = IntPtr.Zero;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorAlloc(_pointer, (UIntPtr)size, out allocation));
@@ -337,9 +351,13 @@ internal void FreeMemory(IntPtr allocation)
         #region IDisposable Support
         protected virtual void Dispose(bool disposing)
         {
-            if (disposing && _owned)
+            if (disposing)
             {
-                NativeMethods.OrtReleaseAllocator(_pointer);
+                if (_owned)
+                {
+                    NativeMethods.OrtReleaseAllocator(_pointer);
+                }
+                _pointer = IntPtr.Zero;
             }
         }
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index ff754b0fe1be3..010a8f07ea7d3 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -10,10 +10,45 @@
 
 namespace Microsoft.ML.OnnxRuntime
 {
-    /// <summary>
-    /// This helper class contains methods to create native OrtValue from a managed value object
-    /// </summary>
-    internal static class NativeOnnxValueHelper
+    internal class PinnedGCHandle : IDisposable
+    {
+        private GCHandle _handle;
+
+        public PinnedGCHandle(GCHandle handle)
+        {
+            _handle = handle;
+        }
+
+        public IntPtr Pointer
+        {
+            get
+            {
+                return _handle.AddrOfPinnedObject();
+            }
+        }
+
+        #region Disposable Support
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                _handle.Free();
+            }
+        }
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+        // No need for the finalizer
+        // If this is not disposed timely GC can't help us
+        #endregion
+   }
+
+/// <summary>
+/// This helper class contains methods to create native OrtValue from a managed value object
+/// </summary>
+internal static class NativeOnnxValueHelper
     {
         /// <summary>
         /// Attempts to Pin the buffer, and create a native OnnxValue out of it. the pinned MemoryHandle is passed to output.
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
new file mode 100644
index 0000000000000..8acad3a8e4de5
--- /dev/null
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -0,0 +1,46 @@
+﻿using System;
+using System.Collections.Generic;
+using System.Text;
+
+namespace Microsoft.ML.OnnxRuntime
+{
+    /// <summary>
+    /// Represents a disposable OrtValue
+    /// If necessary maybe made public and more
+    /// functionality added. Right now it is disposable
+    /// </summary>
+    internal class OrtValue : IDisposable
+    {
+        private IntPtr _handle;
+
+        internal OrtValue(IntPtr handle)
+        {
+            _handle = handle;
+        }
+
+        internal IntPtr Handle
+        {
+            get
+            {
+                return _handle; 
+            }
+        }
+
+        #region Disposable Support
+        protected virtual void Dispose(bool disposing)
+        {
+            if (disposing)
+            {
+                NativeMethods.OrtReleaseValue(_handle);
+                _handle = IntPtr.Zero;
+            }
+        }
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+        // No need for the finalizer
+        #endregion
+    }
+}
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 7f36a2777f1bb..d3a7cfa860fd6 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1674,7 +1674,7 @@ void TestCPUAllocator(InferenceSession session)
                     var alloc_info = allocator.Info;
                     Assert.True(info_cpu.CompareMemoryInfo(alloc_info));
 
-                    int size = 1024;
+                    uint size = 1024;
                     MemoryAllocation chunk = allocator.Allocate(size);
                     Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
@@ -1703,7 +1703,7 @@ void TestCUDAAllocator(InferenceSession session)
                     var alloc_info = allocator.Info;
                     Assert.True(info_cuda.CompareMemoryInfo(alloc_info));
 
-                    int size = 1024;
+                    uint size = 1024;
                     MemoryAllocation chunk = allocator.Allocate(size);
                     Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);

From 618486d1f7bac4e0dcd4166e65f747cd14c03d8a Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Thu, 16 Jul 2020 16:22:29 -0700
Subject: [PATCH 13/39] Implement IoBinding and some auxiallry functionality

---
 .../InferenceSession.cs                       |  4 +-
 .../src/Microsoft.ML.OnnxRuntime/IoBinding.cs | 54 ++++++++----
 .../MemoryAllocator.cs                        |  2 +-
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  4 +-
 .../NativeOnnxTensorMemory.cs                 |  6 +-
 .../NativeOnnxValueHelper.cs                  | 88 ++++++++-----------
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  | 46 +++++++++-
 .../Tensors/ArrayUtilities.cs                 | 14 +++
 .../Tensors/Tensor.cs                         | 24 +++++
 9 files changed, 166 insertions(+), 76 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 981015dd0c65b..9445a485bdf14 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -6,7 +6,7 @@
 using System.Collections.Generic;
 using System.IO;
 using System.Linq;
-
+using Microsoft.ML.OnnxRuntime.Tensors;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -364,7 +364,7 @@ public void Run(
             int outputIndex = 0;
             foreach (var output in outputValues)
             {
-                if (output.ElementType == TensorElementType.String)
+                if (output.ElementType == Tensors.TensorElementType.String)
                 {
                     throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
                 }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
index 550bb3a94a5d1..8453211f0671c 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
@@ -30,30 +30,54 @@ internal IntPtr Handle
             }
         }
 
-        public void BindInput(string name, MemoryAllocation allocation)
+        public void BindInput(string name, Tensors.TensorElementType elementType, long[] shape, MemoryAllocation allocation)
         {
+            using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
+                                                                    elementType,
+                                                                    shape,
+                                                                    allocation.Pointer, allocation.Size))
+                BindIntputOrOutput(name, ortValue.Handle, true);
         }
 
-        public void BindInputs(ReadOnlySequence<string> names, ReadOnlySequence<MemoryAllocation> allocations)
+        public void BindInput(string name, FixedBufferOnnxValue fixedValue)
         {
-            if(names.IsEmpty || names.Length != allocations.Length)
+            if(fixedValue.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
             {
-                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Names and Allocations must be of equal length");
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
             }
+            BindIntputOrOutput(name, fixedValue.Value, true);
+        }
+
+        public void BindOutput(string name, Tensors.TensorElementType elementType, long[] shape, MemoryAllocation allocation)
+        {
+            using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
+                                                                    elementType,
+                                                                    shape,
+                                                                    allocation.Pointer, allocation.Size))
+                BindIntputOrOutput(name, ortValue.Handle, false);
+        }
+
+        public void BindOutput(string name, FixedBufferOnnxValue fixedValue)
+        {
+            if (fixedValue.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
+            {
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
+            }
+            BindIntputOrOutput(name, fixedValue.Value, false);
+        }
 
-            using (var ortValues = new DisposableList<OrtValue>())
-            using (var pinnedNames = new DisposableList<MemoryHandle>())
+        private void BindIntputOrOutput(string name, IntPtr ortValue, bool isInput)
+        {
+            var utf8_str_pinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
+            using (var pinnedName = new PinnedGCHandle(utf8_str_pinned))
             {
-                for(int i = 0; i < names.Length; ++i)
+                if (isInput)
+                {
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtBindInput(_handle, pinnedName.Pointer, ortValue));
+                }
+                else
                 {
-                    var first = names.Start;
-                    ReadOnlyMemory<string> name;
-                    bool next = true;
-                    //while(next)
-                    //{
-                    //    names.TryGet(ref first, out name, true);
-                    //MemoryHandle pinned_utf8 = new Memory<byte>(Encoding.UTF8.GetBytes(string.Concat(name, '\0'))).Pin();
-                    //pinnedNames.Add(pinned_utf8);
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtBindOutput(_handle, pinnedName.Pointer, ortValue));
                 }
             }
         }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
index 5ffe5749129fc..39106bb53140c 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
@@ -117,7 +117,7 @@ public MemoryInfo(byte[] utf8_allocator_name, AllocatorType alloc_type, int devi
         /// <param name="device_id">Device id</param>
         /// <param name="mem_type">Memory type</param>
         public MemoryInfo(string allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
-            : this(Encoding.UTF8.GetBytes(allocator_name + '\0'), alloc_type, device_id, mem_type)
+            : this(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(allocator_name), alloc_type, device_id, mem_type)
         {
         }
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 9e7175b7aa0ef..26824d2e5f9a3 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -724,7 +724,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
                         IntPtr /*_Inout_ OrtAllocator* */ allocator,
                         long[] /*_In_ const int64_t* */ shape,
                         UIntPtr /*size_t*/ shape_len,
-                        TensorElementType type,
+                        Tensors.TensorElementType type,
                         out IntPtr /* OrtValue** */ outputValue);
         public static DOrtCreateTensorAsOrtValue OrtCreateTensorAsOrtValue;
 
@@ -734,7 +734,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
                                                         UIntPtr dataLength,
                                                         long[] shape,
                                                         UIntPtr shapeLength,
-                                                        TensorElementType type,
+                                                        Tensors.TensorElementType type,
                                                         out IntPtr /* OrtValue** */ outputValue);
         public static DOrtCreateTensorWithDataAsOrtValue OrtCreateTensorWithDataAsOrtValue;
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
index e0595485f40d7..240717b915f43 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
@@ -8,7 +8,7 @@
 using System.Runtime.CompilerServices;
 using System.Runtime.InteropServices;
 using System.Threading;
-
+using Microsoft.ML.OnnxRuntime.Tensors;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -27,7 +27,7 @@ internal class NativeOnnxTensorMemory<T> : MemoryManager<T>, NativeMemoryHandler
         private IntPtr _onnxValueHandle;      // pointer to onnxvalue object in native
         private IntPtr _dataBufferPointer;    // pointer to mutable tensor data in native memory
         private string[] _dataBufferAsString; // string tensor values copied into managed memory
-        private TensorElementType _elementType;
+        private Tensors.TensorElementType _elementType;
         private int _elementCount;
         private int _elementWidth;
         private int[] _dimensions;
@@ -157,7 +157,7 @@ public void Dispose()
 
         public int ElementWidth => _elementWidth;
 
-        public TensorElementType ElementType => _elementType;
+        public Tensors.TensorElementType ElementType => _elementType;
 
         public override Span<T> GetSpan()
         {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index 010a8f07ea7d3..971cdb2f683ad 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -168,29 +168,27 @@ internal static void CreateNativeOnnxValue(Object value, out IntPtr onnxValue, o
                     throw new NotSupportedException("The inference value " + nameof(value) + " is not of a supported type");
                 }
 
-                Debug.Assert(dataBufferPointer != IntPtr.Zero, "dataBufferPointer must be non-null after obtaining the pinned buffer");
+                try
+                {
+                    Debug.Assert(dataBufferPointer != IntPtr.Zero, "dataBufferPointer must be non-null after obtaining the pinned buffer");
 
-                onnxValueType = OnnxValueType.ONNX_TYPE_TENSOR; // set onnx value type to tensor
+                    onnxValueType = OnnxValueType.ONNX_TYPE_TENSOR; // set onnx value type to tensor
 
-                // copy to an ulong[] shape to match size_t[]
-                long[] longShape = new long[rank];
-                for (int i = 0; i < rank; i++)
-                {
-                    longShape[i] = shape[i];
-                }
+                    // copy to an ulong[] shape to match int64_t[]
+                    long[] longShape = new long[rank];
+                    for (int i = 0; i < rank; i++)
+                    {
+                        longShape[i] = shape[i];
+                    }
 
-                IntPtr status = NativeMethods.OrtCreateTensorWithDataAsOrtValue(
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorWithDataAsOrtValue(
                         MemoryInfo.DefaultInstance.Pointer,
                         dataBufferPointer,
                         (UIntPtr)(dataBufferLength),
                         longShape,
                         (UIntPtr)rank,
                         elementType,
-                        out onnxValue
-                    );
-                try
-                {
-                    NativeApiStatus.VerifySuccess(status);
+                        out onnxValue));
                 }
                 catch (OnnxRuntimeException e)
                 {
@@ -230,28 +228,18 @@ out nativeTensor
                     // fill the native tensor, using GetValue(index) from the Tensor<string>
                     var len = tensorValue.Length;
                     var stringsInTensor = new IntPtr[len];
-                    var pinnedHandles = new GCHandle[len + 1];
-                    pinnedHandles[len] = GCHandle.Alloc(stringsInTensor, GCHandleType.Pinned);
-                    try
+                    using(var pinnedHandles = new DisposableList<PinnedGCHandle>((int)len))
                     {
                         for (int i = 0; i < len; i++)
                         {
-                            var utf8str = UTF8Encoding.UTF8.GetBytes(tensorValue.GetValue(i) + "\0");
-                            pinnedHandles[i] = GCHandle.Alloc(utf8str, GCHandleType.Pinned);
-                            stringsInTensor[i] = pinnedHandles[i].AddrOfPinnedObject();
+                            var utf8str = StringToZeroTerminatedUtf8(tensorValue.GetValue(i));
+                            var gcHandle = GCHandle.Alloc(utf8str, GCHandleType.Pinned);
+                            stringsInTensor[i] = gcHandle.AddrOfPinnedObject();
+                            pinnedHandles.Add(new PinnedGCHandle(gcHandle));
                         }
 
-                        NativeApiStatus.VerifySuccess(NativeMethods.OrtFillStringTensor(nativeTensor, stringsInTensor, (UIntPtr)len));
-                    }
-                    finally
-                    {
-                        foreach (var handle in pinnedHandles)
-                        {
-                            if (handle.IsAllocated)
-                            {
-                                handle.Free();
-                            }
-                        }
+                        using (var pinnedStrings = new PinnedGCHandle(GCHandle.Alloc(stringsInTensor, GCHandleType.Pinned)))
+                            NativeApiStatus.VerifySuccess(NativeMethods.OrtFillStringTensor(nativeTensor, stringsInTensor, (UIntPtr)len));
                     }
                 }
                 catch (OnnxRuntimeException e)
@@ -389,6 +377,23 @@ private static bool TryPinAsTensor<T>(
             return false;
         }
 
+        /// <summary>
+        /// Converts C# UTF-16 string to UTF-8 zero terminated
+        /// byte[] instance
+        /// </summary>
+        /// <param name="s">string to be converted</param>
+        /// <returns>UTF-8 encoded equivalent</returns>
+        internal static byte[] StringToZeroTerminatedUtf8(string s)
+        {
+            return UTF8Encoding.UTF8.GetBytes(s + '\0');
+        }
+
+        /// <summary>
+        /// Reads UTF-8 encode string from a C zero terminated string
+        /// and converts it into a C# UTF-16 encoded string
+        /// </summary>
+        /// <param name="nativeUtf8">pointer to native or pinned memory where Utf-8 resides</param>
+        /// <returns></returns>
         internal static string StringFromNativeUtf8(IntPtr nativeUtf8) 
         {
             // .NET 5.0 has Marshal.PtrToStringUTF8 that does the below
@@ -400,27 +405,6 @@ internal static string StringFromNativeUtf8(IntPtr nativeUtf8)
         }
     }
 
-    internal enum TensorElementType
-    {
-        Float = 1,
-        UInt8 = 2,
-        Int8 = 3,
-        UInt16 = 4,
-        Int16 = 5,
-        Int32 = 6,
-        Int64 = 7,
-        String = 8,
-        Bool = 9,
-        Float16 = 10,
-        Double = 11,
-        UInt32 = 12,
-        UInt64 = 13,
-        Complex64 = 14,
-        Complex128 = 15,
-        BFloat16 = 16,
-        DataTypeMax = 17
-    }
-
     public enum OnnxValueType
     {
         ONNX_TYPE_UNKNOWN = 0,
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index 8acad3a8e4de5..b6523cb4b3c79 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -1,4 +1,5 @@
-﻿using System;
+﻿using Microsoft.ML.OnnxRuntime.Tensors;
+using System;
 using System.Collections.Generic;
 using System.Text;
 
@@ -13,11 +14,54 @@ internal class OrtValue : IDisposable
     {
         private IntPtr _handle;
 
+        /// <summary>
+        /// Used by factory methods to instantiate
+        /// </summary>
+        /// <param name="handle"></param>
         internal OrtValue(IntPtr handle)
         {
             _handle = handle;
         }
 
+        /// <summary>
+        /// Factory method to construct an OrtValue of Tensor type on top of pre-allocated memory.
+        /// This can be a piece of native memory allocated by MemoryAllocator (possibly on a device)
+        /// or a piece of pinned managed memory.
+        /// 
+        /// The resulting OrtValue does not own the underlying memory buffer and will not attempt to
+        /// deallocated it.
+        /// </summary>
+        /// <param name="memInfo">Memory Info. For managed memory it is a default cpu.
+        ///                       For Native memory must be obtained from the allocator or MemoryAllocation instance</param>
+        /// <param name="elementType">DataType for the Tensor</param>
+        /// <param name="shape">Tensor shape</param>
+        /// <param name="dataBuffer">Pointer to a raw memory buffer</param>
+        /// <param name="bufferLength">Buffer length in bytes</param>
+        /// <returns>A disposable instance of OrtValue</returns>
+        public static OrtValue CreateTensorValueWithData(MemoryInfo memInfo, TensorElementType elementType,
+                                                         long[] shape,
+                                                         IntPtr dataBuffer,
+                                                         uint bufferLength)
+        {
+            var shapeSize = ArrayUtilities.GetSizeForShape(shape);
+            if(shapeSize > bufferLength)
+            {
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Can not bind the shape to smaller buffer");
+            }
+
+            IntPtr ortValueHandle = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorWithDataAsOrtValue(
+                                    memInfo.Pointer,
+                                    dataBuffer,
+                                    (UIntPtr)bufferLength,
+                                    shape,
+                                    (UIntPtr)shape.Length,
+                                    elementType,
+                                    out ortValueHandle
+                                ));
+            return new OrtValue(ortValueHandle);
+        }
+
         internal IntPtr Handle
         {
             get
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/ArrayUtilities.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/ArrayUtilities.cs
index 2913799968930..827b838dd5276 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/ArrayUtilities.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/ArrayUtilities.cs
@@ -20,6 +20,20 @@ internal static class ArrayUtilities
     {
         public const int StackallocMax = 16;
 
+        public static long GetSizeForShape(long[] shape)
+        {
+            long product = 1;
+            foreach( var dim in shape)
+            {
+                if (dim < 0)
+                {
+                    throw new ArgumentOutOfRangeException("Shape must not have negative elements:" + dim);
+                }
+                product *= dim;
+            }
+            return product;
+        }
+
         public static long GetProduct(ReadOnlySpan<int> dimensions, int startIndex = 0)
         {
             if (dimensions.Length == 0)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
index 7b522c4f852ae..09d4634ab636e 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
@@ -29,6 +29,30 @@
 
 namespace Microsoft.ML.OnnxRuntime.Tensors
 {
+    /// <summary>
+    /// Supported Tensor DataType
+    /// </summary>
+    public enum TensorElementType
+    {
+        Float = 1,
+        UInt8 = 2,
+        Int8 = 3,
+        UInt16 = 4,
+        Int16 = 5,
+        Int32 = 6,
+        Int64 = 7,
+        String = 8,
+        Bool = 9,
+        Float16 = 10,
+        Double = 11,
+        UInt32 = 12,
+        UInt64 = 13,
+        Complex64 = 14,
+        Complex128 = 15,
+        BFloat16 = 16,
+        DataTypeMax = 17
+    }
+
     /// <summary>
     /// Various methods for creating and manipulating Tensor&lt;T&gt;
     /// </summary>

From 80a736115eab2d3eb4453b6963830e115bccc133 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 17 Jul 2020 15:30:34 -0700
Subject: [PATCH 14/39] Adjust for the recent changes in Api global instance.

---
 .../core/session/onnxruntime_cxx_inline.h     | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index aef54e26b03db..efd7580cafc6a 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -79,44 +79,44 @@ inline MemoryInfo::MemoryInfo(const char* name, OrtAllocatorType type, int id, O
 }
 
 inline Allocator::Allocator(const Session& sess, const MemoryInfo& mem_info) {
-  ThrowOnError(Global<void>::api_.CreateAllocator(sess.operator const OrtSession*(),
+  ThrowOnError(GetApi().CreateAllocator(sess.operator const OrtSession*(),
                                                   mem_info.operator const OrtMemoryInfo*(), &p_));
 }
 
 inline void* Allocator::Alloc(size_t size) const {
   void* out = nullptr;
-  ThrowOnError(Global<void>::api_.AllocatorAlloc(p_, size, &out));
+  ThrowOnError(GetApi().AllocatorAlloc(p_, size, &out));
   return out;
 }
 
 inline void Allocator::Free(void* p) const {
-  ThrowOnError(Global<void>::api_.AllocatorFree(p_, p));
+  ThrowOnError(GetApi().AllocatorFree(p_, p));
 }
 
 inline const OrtMemoryInfo* Allocator::GetInfo() const {
   const OrtMemoryInfo* out = nullptr;
-  ThrowOnError(Global<void>::api_.AllocatorGetInfo(p_, &out));
+  ThrowOnError(GetApi().AllocatorGetInfo(p_, &out));
   return out;
 }
 
 inline IoBinding::IoBinding(Session& session) {
-  ThrowOnError(Global<void>::api_.CreateIoBinding(session, &p_));
+  ThrowOnError(GetApi().CreateIoBinding(session, &p_));
 }
 
 inline void IoBinding::BindInput(const char* name, const Value& value) {
-  ThrowOnError(Global<void>::api_.BindInput(p_, name, value));
+  ThrowOnError(GetApi().BindInput(p_, name, value));
 }
 
 inline void IoBinding::BindOutput(const char* name, const Value& value) {
-  ThrowOnError(Global<void>::api_.BindOutput(p_, name, value));
+  ThrowOnError(GetApi().BindOutput(p_, name, value));
 }
 
 inline void IoBinding::ClearBoundInputs() {
-  Global<void>::api_.ClearBoundInputs(p_);
+  GetApi().ClearBoundInputs(p_);
 }
 
 inline void IoBinding::ClearBoundOutputs() {
-  Global<void>::api_.ClearBoundOutputs(p_);
+  GetApi().ClearBoundOutputs(p_);
 }
 
 inline Env::Env(OrtLoggingLevel default_warning_level, _In_ const char* logid) {
@@ -290,7 +290,7 @@ inline void Session::Run(const RunOptions& run_options, const char* const* input
 }
 
 inline void Session::Run(const RunOptions& run_options, const IoBinding& io_binding) {
-  ThrowOnError(Global<void>::api_.RunWithBinding(p_, run_options, io_binding));
+  ThrowOnError(GetApi().RunWithBinding(p_, run_options, io_binding));
 }
 
 inline size_t Session::GetInputCount() const {

From 29c1b8eced5091058112e48df234a33e0206cbf2 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 17 Jul 2020 16:07:07 -0700
Subject: [PATCH 15/39] Introduce BindOutputToDevice API

---
 .../onnxruntime/core/session/onnxruntime_c_api.h  | 15 +++++++++++++++
 .../core/session/onnxruntime_cxx_api.h            |  1 +
 .../core/session/onnxruntime_cxx_inline.h         |  4 ++++
 onnxruntime/core/session/onnxruntime_c_api.cc     | 13 +++++++++++++
 onnxruntime/core/session/ort_apis.h               |  1 +
 5 files changed, 34 insertions(+)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index e354ddc6c38d1..c5bf998cafb3f 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -851,9 +851,11 @@ struct OrtApi {
    * \return OrtStatus instance on error which the caller is responsible to free or nullptr on success
    */
   ORT_API2_STATUS(BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
+
   /**
    * The function will bind the OrtValue to the specified output name.
    * The OrtValue must be a Tensor. ORT would use that value in place of output for the specified name.
+   *
    * \param binding_ptr - an instance of OrtIoBinding created by CreateIoBinding()
    * \param name - name for the model output
    * \param  val_ptr - OrtValue of Tensor type.
@@ -861,6 +863,19 @@ struct OrtApi {
    */
   ORT_API2_STATUS(BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
 
+  /**
+   * The function will bind the OrtValue to a device which specification is contained within OrtMemoryInfo
+   * You can either create an instance ofr OrtMemoryInfo or obtain one from the allocator that you are using.
+   * This is useful when one or more outputs have dynamic shapes and, it is hard to create an output OrtValue with
+   * a Tensor ahead of time.
+   *
+   * \param binding_ptr - an instance of OrtIoBinding created by CreateIoBinding()
+   * \param name - name for the model output
+   * \param  mem_info_ptr - OrtMemoryInfo
+   * \return OrtStatus instance on error which the caller is responsible to free or nullptr on success
+   */
+  ORT_API2_STATUS(BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* val_ptr);
+
   /** Clears any previously specified bindings for inputs/outputs
    */
   void(ORT_API_CALL* ClearBoundInputs)(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 5d02ca8c08525..576ff82db07a8 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -335,6 +335,7 @@ struct IoBinding : public Base<OrtIoBinding> {
   explicit IoBinding(Session& session);
   void BindInput(const char* name, const Value&);
   void BindOutput(const char* name, const Value&);
+  void BindOutput(const char* name, const MemoryInfo&);
   void ClearBoundInputs();
   void ClearBoundOutputs();
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index efd7580cafc6a..ce5521dc34cfc 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -111,6 +111,10 @@ inline void IoBinding::BindOutput(const char* name, const Value& value) {
   ThrowOnError(GetApi().BindOutput(p_, name, value));
 }
 
+inline void IoBinding::BindOutput(const char* name, const MemoryInfo& mem_info) {
+  ThrowOnError(GetApi().BindOutputToDevice(p_, name, mem_info));
+}
+
 inline void IoBinding::ClearBoundInputs() {
   GetApi().ClearBoundInputs(p_);
 }
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 59fc493510555..89888d6695e00 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -4,6 +4,7 @@
 #include "core/session/onnxruntime_c_api.h"
 #include "core/session/allocator_impl.h"
 #include "core/session/IOBinding.h"
+#include "core/framework/allocator.h"
 #include "core/framework/error_code_helper.h"
 #include "core/framework/execution_provider.h"
 #include "core/framework/utils.h"
@@ -579,6 +580,17 @@ ORT_API_STATUS_IMPL(OrtApis::BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* mem_info_ptr) {
+  API_IMPL_BEGIN
+  auto st = binding_ptr->binding_->BindOutput(name, mem_info_ptr->device);
+  if (!st.IsOK()) {
+    return ToOrtStatus(st);
+  }
+  return nullptr;
+  API_IMPL_END
+}
+
+
 void OrtApis::ClearBoundInputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
   binding_ptr->binding_->ClearInputs();
 }
@@ -1686,6 +1698,7 @@ static constexpr OrtApi ort_api_1_to_4 = {
     &OrtApis::ReleaseIoBinding,
     &OrtApis::BindInput,
     &OrtApis::BindOutput,
+    &OrtApis::BindOutputToDevice,
     &OrtApis::ClearBoundInputs,
     &OrtApis::ClearBoundOutputs,
 
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 7f29bd99d8a2e..1252eece39bf4 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -209,6 +209,7 @@ ORT_API(void, ReleaseIoBinding, _Frees_ptr_opt_ OrtIoBinding* allocator);
 
 ORT_API_STATUS_IMPL(BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
 ORT_API_STATUS_IMPL(BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
+ORT_API_STATUS_IMPL(BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* val_ptr);
 
 ORT_API(void, ClearBoundInputs, _Inout_ OrtIoBinding* binding_ptr);
 ORT_API(void, ClearBoundOutputs, _Inout_ OrtIoBinding* binding_ptr);

From a568fa4b610baa4ecc3fd68184b0a973dcc39f35 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Thu, 23 Jul 2020 17:42:50 -0700
Subject: [PATCH 16/39] Some refactoring.

---
 .../InferenceSession.cs                       |  38 ++---
 .../src/Microsoft.ML.OnnxRuntime/IoBinding.cs |  12 +-
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  12 +-
 .../NativeOnnxValueHelper.cs                  |   4 +-
 .../{MemoryAllocator.cs => OrtAllocator.cs}   | 138 +++++++++---------
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  |  47 +++---
 .../InferenceTest.cs                          |  20 +--
 7 files changed, 142 insertions(+), 129 deletions(-)
 rename csharp/src/Microsoft.ML.OnnxRuntime/{MemoryAllocator.cs => OrtAllocator.cs} (68%)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 9445a485bdf14..b92bd1834ddbb 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -700,6 +700,15 @@ public IoBinding CreateIOBinding()
             return new IoBinding(this);
         }
 
+        /// <summary>
+        ///  Make this method return a collection of DisposableNamedOnnxValue as in other interfaces
+        ///  Query names from IoBinding object and pair then with the array of OrtValues returned
+        ///  
+        /// This method will run inference and will return outputs with names for the outputs
+        /// previously bound to ioBinding instance.
+        /// </summary>
+        /// <param name="runOptions">RunOptions</param>
+        /// <param name="ioBinding">IoBinding instance with bindings</param>
         public void Run(RunOptions runOptions, IoBinding ioBinding)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
@@ -711,24 +720,17 @@ public void Run(RunOptions runOptions, IoBinding ioBinding)
         public string EndProfiling()
         {
             IntPtr nameHandle = IntPtr.Zero;
-            string str = null;
-
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionEndProfiling(_nativeHandle,
+                                                                   OrtAllocator.DefaultInstance.Pointer,
+                                                                   out nameHandle));
             try
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionEndProfiling(_nativeHandle,
-                                                                                   MemoryAllocator.DefaultInstance.Pointer,
-                                                                                   out nameHandle));
-                str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
+                return NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
             finally
             {
-                if (nameHandle != IntPtr.Zero)
-                {
-                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
-                }
+                OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
             }
-
-            return str;
         }
 
         //TODO: kept internal until implemented
@@ -834,7 +836,7 @@ private string GetOutputName(ulong index)
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOutputName(
                                                _nativeHandle,
                                                (UIntPtr)index,
-                                               MemoryAllocator.DefaultInstance.Pointer,
+                                               OrtAllocator.DefaultInstance.Pointer,
                                                out nameHandle));
 
                 str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
@@ -843,7 +845,7 @@ private string GetOutputName(ulong index)
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
 
@@ -860,7 +862,7 @@ private string GetInputName(ulong index)
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetInputName(
                                                _nativeHandle,
                                                (UIntPtr)index,
-                                               MemoryAllocator.DefaultInstance.Pointer,
+                                               OrtAllocator.DefaultInstance.Pointer,
                                                out nameHandle));
 
                 str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
@@ -869,7 +871,7 @@ private string GetInputName(ulong index)
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
             return str;
@@ -885,7 +887,7 @@ private string GetOverridableInitializerName(ulong index)
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOverridableInitializerName(
                                                 _nativeHandle,
                                                 (UIntPtr)index,
-                                                MemoryAllocator.DefaultInstance.Pointer,
+                                                OrtAllocator.DefaultInstance.Pointer,
                                                 out nameHandle));
 
                 str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
@@ -894,7 +896,7 @@ private string GetOverridableInitializerName(ulong index)
             {
                 if (nameHandle != IntPtr.Zero)
                 {
-                    MemoryAllocator.DefaultInstance.FreeMemory(nameHandle);
+                    OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
                 }
             }
             return str;
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
index 8453211f0671c..a0264d7cb3488 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
@@ -17,6 +17,10 @@ public class IoBinding : IDisposable
     {
         private IntPtr _handle;
 
+        /// <summary>
+        /// Use InferenceSession.CreateIOBinding()
+        /// </summary>
+        /// <param name="session"></param>
         internal IoBinding(InferenceSession session)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateIoBinding(session.Handle, out _handle));
@@ -30,7 +34,7 @@ internal IntPtr Handle
             }
         }
 
-        public void BindInput(string name, Tensors.TensorElementType elementType, long[] shape, MemoryAllocation allocation)
+        public void BindInput(string name, Tensors.TensorElementType elementType, long[] shape, OrtMemoryAllocation allocation)
         {
             using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
                                                                     elementType,
@@ -48,7 +52,7 @@ public void BindInput(string name, FixedBufferOnnxValue fixedValue)
             BindIntputOrOutput(name, fixedValue.Value, true);
         }
 
-        public void BindOutput(string name, Tensors.TensorElementType elementType, long[] shape, MemoryAllocation allocation)
+        public void BindOutput(string name, Tensors.TensorElementType elementType, long[] shape, OrtMemoryAllocation allocation)
         {
             using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
                                                                     elementType,
@@ -68,8 +72,8 @@ public void BindOutput(string name, FixedBufferOnnxValue fixedValue)
 
         private void BindIntputOrOutput(string name, IntPtr ortValue, bool isInput)
         {
-            var utf8_str_pinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
-            using (var pinnedName = new PinnedGCHandle(utf8_str_pinned))
+            var utf8NamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
+            using (var pinnedName = new PinnedGCHandle(utf8NamePinned))
             {
                 if (isInput)
                 {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 26824d2e5f9a3..cf8e95ca32344 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -556,16 +556,16 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         public delegate IntPtr /* (OrtStatus*)*/ DOrtCreateMemoryInfo(
                                                             IntPtr /*(const char*) */name,
-                                                            AllocatorType allocatorType,
+                                                            OrtAllocatorType allocatorType,
                                                             int identifier,
-                                                            MemoryType memType,
+                                                            OrtMemType memType,
                                                             out IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo    // memory ownership transfered to caller
                                                        );
         public static DOrtCreateMemoryInfo OrtCreateMemoryInfo;
 
         public delegate IntPtr /* (OrtStatus*)*/ DOrtCreateCpuMemoryInfo(
-                                                            AllocatorType allocatorType,
-                                                            MemoryType memoryType,
+                                                            OrtAllocatorType allocatorType,
+                                                            OrtMemType memoryType,
                                                             out IntPtr /*(OrtMemoryInfo*)*/ allocatorInfo
                                                         );
         public static DOrtCreateCpuMemoryInfo OrtCreateCpuMemoryInfo;
@@ -589,12 +589,12 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
 
         public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetMemType(
                                                 IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
-                                                out MemoryType /*(OrtMemType*)*/ mem_type);
+                                                out OrtMemType /*(OrtMemType*)*/ mem_type);
         public static DOrtMemoryInfoGetMemType OrtMemoryInfoGetMemType;
 
         public delegate IntPtr /*(OrtStatus*)*/ DOrtMemoryInfoGetType(
                                                 IntPtr /*(const OrtMemoryInfo* ptr)*/ mem_info,
-                                                out AllocatorType /*(OrtAllocatorType*)*/ alloc_type
+                                                out OrtAllocatorType /*(OrtAllocatorType*)*/ alloc_type
                                                 );
         public static DOrtMemoryInfoGetType OrtMemoryInfoGetType;
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index 971cdb2f683ad..08658c647dd44 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -182,7 +182,7 @@ internal static void CreateNativeOnnxValue(Object value, out IntPtr onnxValue, o
                     }
 
                     NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorWithDataAsOrtValue(
-                        MemoryInfo.DefaultInstance.Pointer,
+                        OrtMemoryInfo.DefaultInstance.Pointer,
                         dataBufferPointer,
                         (UIntPtr)(dataBufferLength),
                         longShape,
@@ -218,7 +218,7 @@ internal static void CreateNativeOnnxValue(Object value, out IntPtr onnxValue, o
                 try
                 {
                     NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorAsOrtValue(
-                                                    MemoryAllocator.DefaultInstance.Pointer,
+                                                    OrtAllocator.DefaultInstance.Pointer,
                                                     longShape,
                                                     (UIntPtr)(longShape.Length),
                                                     TensorElementType.String,
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
similarity index 68%
rename from csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
rename to csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
index 39106bb53140c..6bec88fac9896 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/MemoryAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
@@ -10,7 +10,7 @@ namespace Microsoft.ML.OnnxRuntime
     /// <summary>
     /// See documentation for OrtAllocatorType in C API
     /// </summary>
-    public enum AllocatorType
+    public enum OrtAllocatorType
     {
         DeviceAllocator = 0,
         ArenaAllocator = 1
@@ -19,7 +19,7 @@ public enum AllocatorType
     /// <summary>
     /// See documentation for OrtMemType in C API
     /// </summary>
-    public enum MemoryType
+    public enum OrtMemType
     {
         CpuInput = -2,                      // Any CPU memory used by non-CPU execution provider
         CpuOutput = -1,                     // CPU accessible memory outputted by non-CPU execution provider, i.e. CUDA_PINNED
@@ -29,29 +29,29 @@ public enum MemoryType
 
     /// <summary>
     /// This class encapsulates and most of the time owns the underlying native OrtMemoryInfo instance.
-    /// Instance returned from MemoryAllocator will not own OrtMemoryInfo, the class must be disposed
+    /// Instance returned from OrtAllocator will not own OrtMemoryInfo, the class must be disposed
     /// regardless.
     /// 
-    /// Use this class to query and create MemoryAllocator instances so you can pre-allocate memory for model
+    /// Use this class to query and create OrtAllocator instances so you can pre-allocate memory for model
     /// inputs/outputs and use it for binding. Instances of the class can also used to created OrtValues bound
-    /// to pre-allocated memory. In that case, the instance of MemoryInfo contains the information about the allocator
+    /// to pre-allocated memory. In that case, the instance of OrtMemoryInfo contains the information about the allocator
     /// used to allocate the underlying memory.
     /// </summary>
-    public class MemoryInfo : IDisposable
+    public class OrtMemoryInfo : IDisposable
     {
-        private static readonly Lazy<MemoryInfo> _defaultCpuAllocInfo = new Lazy<MemoryInfo>(CreateCpuMemoryInfo);
+        private static readonly Lazy<OrtMemoryInfo> _defaultCpuAllocInfo = new Lazy<OrtMemoryInfo>(CreateCpuMemoryInfo);
         private IntPtr _pointer;
         private readonly bool _owned; // false if we are exposing OrtMemoryInfo from an allocator which owns it
 
-        private static MemoryInfo CreateCpuMemoryInfo()
+        private static OrtMemoryInfo CreateCpuMemoryInfo()
         {
-            IntPtr allocInfo = IntPtr.Zero;
+            IntPtr memoryInfo = IntPtr.Zero;
             // Returns OrtMemoryInfo instance that needs to be disposed
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCpuMemoryInfo(AllocatorType.DeviceAllocator, MemoryType.Cpu, out allocInfo));
-            return new MemoryInfo(allocInfo, true);
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateCpuMemoryInfo(OrtAllocatorType.DeviceAllocator, OrtMemType.Cpu, out memoryInfo));
+            return new OrtMemoryInfo(memoryInfo, true);
         }
 
-        public static MemoryInfo DefaultInstance
+        public static OrtMemoryInfo DefaultInstance
         {
             get
             {
@@ -69,55 +69,55 @@ internal IntPtr Pointer
 
         /// <summary>
         /// This allocator takes an native pointer to already existing
-        /// instance of MemoryInfo. That instance may either be owned or not
+        /// instance of OrtMemoryInfo. That instance may either be owned or not
         /// owned. In the latter case, this class serves to expose native properties
         /// of the instance.
         /// </summary>
         /// <param name="allocInfo"></param>
-        internal MemoryInfo(IntPtr allocInfo, bool owned)
+        internal OrtMemoryInfo(IntPtr allocInfo, bool owned)
         {
             _pointer = allocInfo;
             _owned = owned;
         }
 
         // Predefined utf8 encoded allocator names. Use them to construct an instance of
-        // MemoryInfo
-        public static readonly byte[] CPU_allocator = Encoding.UTF8.GetBytes("Cpu" + '\0');
-        public static readonly byte[] CUDA_allocator = Encoding.UTF8.GetBytes("Cuda" + '\0');
-        public static readonly byte[] CUDA_PINNED_allocator = Encoding.UTF8.GetBytes("CudaPinned" + '\0');
+        // OrtMemoryInfo
+        public static readonly byte[] allocatorCPU = Encoding.UTF8.GetBytes("Cpu" + '\0');
+        public static readonly byte[] allocatorCUDA = Encoding.UTF8.GetBytes("Cuda" + '\0');
+        public static readonly byte[] allocatorCUDA_PINNED = Encoding.UTF8.GetBytes("CudaPinned" + '\0');
         /// <summary>
-        /// Create an instance of MemoryInfo according to the specification
+        /// Create an instance of OrtMemoryInfo according to the specification
         /// Memory info instances are usually used to get a handle of a native allocator
         /// that is present within the current inference session object. That, in turn, depends
         /// of what execution providers are available within the binary that you are using and are
         /// registered with Add methods.
         /// </summary>
-        /// <param name="utf8_allocator_name">Allocator name. Use of the predefined above.</param>
-        /// <param name="alloc_type">Allocator type</param>
-        /// <param name="device_id">Device id</param>
-        /// <param name="mem_type">Memory type</param>
-        public MemoryInfo(byte[] utf8_allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
+        /// <param name="utf8AllocatorName">Allocator name. Use of the predefined above.</param>
+        /// <param name="allocatorType">Allocator type</param>
+        /// <param name="deviceId">Device id</param>
+        /// <param name="memoryType">Memory type</param>
+        public OrtMemoryInfo(byte[] utf8AllocatorName, OrtAllocatorType allocatorType, int deviceId, OrtMemType memoryType)
         {
-            using (var pinned_handle = new PinnedGCHandle(GCHandle.Alloc(utf8_allocator_name, GCHandleType.Pinned)))
+            using (var pinnedName = new PinnedGCHandle(GCHandle.Alloc(utf8AllocatorName, GCHandleType.Pinned)))
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinned_handle.Pointer,
-                                                                                alloc_type,
-                                                                                device_id,
-                                                                                mem_type,
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateMemoryInfo(pinnedName.Pointer,
+                                                                                allocatorType,
+                                                                                deviceId,
+                                                                                memoryType,
                                                                                 out _pointer));
             }
             _owned = true;
         }
 
         /// <summary>
-        /// Create an instance of MemoryInfo according to the specification.
+        /// Create an instance of OrtMemoryInfo according to the specification.
         /// </summary>
-        /// <param name="allocator_name">Allocator name</param>
-        /// <param name="alloc_type">Allocator type</param>
-        /// <param name="device_id">Device id</param>
-        /// <param name="mem_type">Memory type</param>
-        public MemoryInfo(string allocator_name, AllocatorType alloc_type, int device_id, MemoryType mem_type)
-            : this(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(allocator_name), alloc_type, device_id, mem_type)
+        /// <param name="allocatorName">Allocator name</param>
+        /// <param name="allocatorType">Allocator type</param>
+        /// <param name="deviceId">Device id</param>
+        /// <param name="memoryType">Memory type</param>
+        public OrtMemoryInfo(string allocatorName, OrtAllocatorType allocatorType, int deviceId, OrtMemType memoryType)
+            : this(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(allocatorName), allocatorType, deviceId, memoryType)
         {
         }
 
@@ -125,9 +125,9 @@ public string Name
         {
             get
             {
-                IntPtr utf8_name = IntPtr.Zero;
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetName(_pointer, out utf8_name));
-                return NativeOnnxValueHelper.StringFromNativeUtf8(utf8_name);
+                IntPtr utf8Name = IntPtr.Zero;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetName(_pointer, out utf8Name));
+                return NativeOnnxValueHelper.StringFromNativeUtf8(utf8Name);
             }
         }
 
@@ -147,21 +147,21 @@ public int Id
         ///  calls behind them so exposing them as Get() would be appropriate.
         /// </summary>
         /// <returns></returns>
-        public MemoryType GetMemoryType()
+        public OrtMemType GetMemoryType()
         {
-            MemoryType mem_type = MemoryType.Default;
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetMemType(_pointer, out mem_type));
-            return mem_type;
+            OrtMemType memoryType = OrtMemType.Default;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetMemType(_pointer, out memoryType));
+            return memoryType;
         }
 
-        public AllocatorType GetAllocatorType()
+        public OrtAllocatorType GetAllocatorType()
         {
-            AllocatorType alloc_type = AllocatorType.ArenaAllocator;
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetType(_pointer, out alloc_type));
-            return alloc_type;
+            OrtAllocatorType allocatorType = OrtAllocatorType.ArenaAllocator;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtMemoryInfoGetType(_pointer, out allocatorType));
+            return allocatorType;
         }
 
-        public bool CompareMemoryInfo(MemoryInfo other)
+        public bool CompareMemoryInfo(OrtMemoryInfo other)
         {
             int result = -1;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCompareMemoryInfo(_pointer, other._pointer, out result));
@@ -191,15 +191,15 @@ public void Dispose()
 
     /// <summary>
     /// This class represents memory allocation made by a specific onnxruntime
-    /// allocator. Use MemoryAllocator.Allocate() to obtain an instance of this class.
+    /// allocator. Use OrtAllocator.Allocate() to obtain an instance of this class.
     /// It implements IDisposable and makes use of the original allocator
     /// used to allocate the memory. The lifespan of the allocator instance must eclipse the
-    /// lifespan of the allocation. Or, if you prefer, all MemoryAllocation instances must be
+    /// lifespan of the allocation. Or, if you prefer, all OrtMemoryAllocation instances must be
     /// disposed of before the corresponding allocator instances are disposed of.
     /// </summary>
-    public class MemoryAllocation : IDisposable
+    public class OrtMemoryAllocation : IDisposable
     {
-        private MemoryAllocator _allocator;
+        private OrtAllocator _allocator;
 
         /// <summary>
         /// Bind an arbitrary piece of native memory to the instance
@@ -207,7 +207,7 @@ public class MemoryAllocation : IDisposable
         /// </summary>
         /// <param name="pointer"></param>
         /// <param name="size"></param>
-        public MemoryAllocation(IntPtr pointer, uint size)
+        public OrtMemoryAllocation(IntPtr pointer, uint size)
         {
             _allocator = null;
             Pointer = pointer;
@@ -216,14 +216,14 @@ public MemoryAllocation(IntPtr pointer, uint size)
 
         /// <summary>
         /// This an instance with a piece of memory allocated
-        /// by onnxruntime MemoryAllocator. The same allocator will be used for
+        /// by onnxruntime OrtAllocator. The same allocator will be used for
         /// for memory disposal. For memory allocated elsewhere, the instance will not own the memory
         /// and will not dispose of it.
         /// </summary>
         /// <param name="allocator"></param>
         /// <param name="pointer"></param>
         /// <param name="size"></param>
-        internal MemoryAllocation(MemoryAllocator allocator, IntPtr pointer, uint size)
+        internal OrtMemoryAllocation(OrtAllocator allocator, IntPtr pointer, uint size)
         {
             _allocator = allocator;
             Pointer = pointer;
@@ -240,7 +240,7 @@ internal MemoryAllocation(MemoryAllocator allocator, IntPtr pointer, uint size)
         /// </summary>
         public uint Size { get; private set; }
 
-        public MemoryInfo Info
+        public OrtMemoryInfo Info
         {
             get
             {
@@ -268,22 +268,22 @@ public void Dispose()
         #endregion
     }
 
-    public class MemoryAllocator : IDisposable
+    public class OrtAllocator : IDisposable
     {
-        private static readonly Lazy<MemoryAllocator> _defaultInstance = new Lazy<MemoryAllocator>(GetDefaultCpuAllocator);
+        private static readonly Lazy<OrtAllocator> _defaultInstance = new Lazy<OrtAllocator>(GetDefaultCpuAllocator);
         private IntPtr _pointer;
         private readonly bool _owned;
 
-        private static MemoryAllocator GetDefaultCpuAllocator()
+        private static OrtAllocator GetDefaultCpuAllocator()
         {
             IntPtr allocator = IntPtr.Zero;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtGetAllocatorWithDefaultOptions(out allocator));
             // Instance of default cpu allocator is a native singleton
             // Do not dispose of
-            return new MemoryAllocator(allocator, false);
+            return new OrtAllocator(allocator, false);
         }
 
-        public static MemoryAllocator DefaultInstance // May throw exception in every access, if the constructor have thrown an exception
+        public static OrtAllocator DefaultInstance // May throw exception in every access, if the constructor have thrown an exception
         {
             get
             {
@@ -304,26 +304,26 @@ internal IntPtr Pointer
         /// </summary>
         /// <param name="allocator"></param>
         /// <param name="owned"></param>
-        internal MemoryAllocator(IntPtr allocator, bool owned)
+        internal OrtAllocator(IntPtr allocator, bool owned)
         {
             this._pointer = allocator;
             this._owned = owned;
         }
 
-        public MemoryAllocator(InferenceSession session, MemoryInfo memInfo)
+        public OrtAllocator(InferenceSession session, OrtMemoryInfo memInfo)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateAllocator(session.Handle, memInfo.Pointer, out _pointer));
             _owned = true;
         }
 
-        public MemoryInfo Info
+        public OrtMemoryInfo Info
         {
             get
             {
-                IntPtr mem_info = IntPtr.Zero;
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorGetInfo(_pointer, out mem_info));
+                IntPtr memoryInfo = IntPtr.Zero;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorGetInfo(_pointer, out memoryInfo));
                 // This serves as an exposure of memory_info owned by the allocator
-                return new MemoryInfo(mem_info, false);
+                return new OrtMemoryInfo(memoryInfo, false);
             }
         }
 
@@ -332,11 +332,11 @@ public MemoryInfo Info
         /// </summary>
         /// <param name="size"></param>
         /// <returns></returns>
-        public MemoryAllocation Allocate(uint size)
+        public OrtMemoryAllocation Allocate(uint size)
         {
             IntPtr allocation = IntPtr.Zero;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtAllocatorAlloc(_pointer, (UIntPtr)size, out allocation));
-            return new MemoryAllocation(this, allocation, size);
+            return new OrtMemoryAllocation(this, allocation, size);
         }
 
         /// <summary>
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index b6523cb4b3c79..e28be06f533fe 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -7,38 +7,48 @@ namespace Microsoft.ML.OnnxRuntime
 {
     /// <summary>
     /// Represents a disposable OrtValue
-    /// If necessary maybe made public and more
-    /// functionality added. Right now it is disposable
     /// </summary>
-    internal class OrtValue : IDisposable
+    public class OrtValue : IDisposable
     {
-        private IntPtr _handle;
-
         /// <summary>
-        /// Used by factory methods to instantiate
+        /// Use factory methods to instantiate
         /// </summary>
         /// <param name="handle"></param>
         internal OrtValue(IntPtr handle)
         {
-            _handle = handle;
+            Handle = handle;
+        }
+
+        internal IntPtr Handle { get; private set; }
+
+        /// <summary>
+        /// This internal interface is used to transfer ownership to
+        /// DisposableNamdOnnxValue class.
+        /// </summary>
+        /// <returns></returns>
+        internal IntPtr TakeOwnership()
+        {
+            var handle = Handle;
+            Handle = IntPtr.Zero;
+            return handle;
         }
 
         /// <summary>
         /// Factory method to construct an OrtValue of Tensor type on top of pre-allocated memory.
-        /// This can be a piece of native memory allocated by MemoryAllocator (possibly on a device)
+        /// This can be a piece of native memory allocated by OrtAllocator (possibly on a device)
         /// or a piece of pinned managed memory.
         /// 
         /// The resulting OrtValue does not own the underlying memory buffer and will not attempt to
         /// deallocated it.
         /// </summary>
         /// <param name="memInfo">Memory Info. For managed memory it is a default cpu.
-        ///                       For Native memory must be obtained from the allocator or MemoryAllocation instance</param>
+        ///                       For Native memory must be obtained from the allocator or OrtMemoryAllocation instance</param>
         /// <param name="elementType">DataType for the Tensor</param>
         /// <param name="shape">Tensor shape</param>
         /// <param name="dataBuffer">Pointer to a raw memory buffer</param>
         /// <param name="bufferLength">Buffer length in bytes</param>
         /// <returns>A disposable instance of OrtValue</returns>
-        public static OrtValue CreateTensorValueWithData(MemoryInfo memInfo, TensorElementType elementType,
+        public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorElementType elementType,
                                                          long[] shape,
                                                          IntPtr dataBuffer,
                                                          uint bufferLength)
@@ -62,21 +72,18 @@ out ortValueHandle
             return new OrtValue(ortValueHandle);
         }
 
-        internal IntPtr Handle
-        {
-            get
-            {
-                return _handle; 
-            }
-        }
-
         #region Disposable Support
         protected virtual void Dispose(bool disposing)
         {
             if (disposing)
             {
-                NativeMethods.OrtReleaseValue(_handle);
-                _handle = IntPtr.Zero;
+                // We have to surrender ownership to some legacy classes
+                if (Handle != IntPtr.Zero)
+                {
+                    NativeMethods.OrtReleaseValue(Handle);
+                    // Prevent use after disposal
+                    Handle = IntPtr.Zero;
+                }
             }
         }
         public void Dispose()
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index d3a7cfa860fd6..3ae109c1acfa6 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1660,22 +1660,22 @@ private void TestInferenceSessionWithByteArray()
         void TestCPUAllocator(InferenceSession session)
         {
             int device_id = 0;
-            using (var info_cpu = new MemoryInfo(MemoryInfo.CPU_allocator, AllocatorType.ArenaAllocator, device_id, MemoryType.Default))
+            using (var info_cpu = new OrtMemoryInfo(OrtMemoryInfo.allocatorCPU, OrtAllocatorType.ArenaAllocator, device_id, OrtMemType.Default))
             {
                 Assert.NotEqual(info_cpu.Pointer, IntPtr.Zero);
                 Assert.Equal("Cpu", info_cpu.Name);
                 Assert.Equal(device_id, info_cpu.Id);
-                Assert.Equal(AllocatorType.ArenaAllocator, info_cpu.GetAllocatorType());
-                Assert.Equal(MemoryType.Default, info_cpu.GetMemoryType());
+                Assert.Equal(OrtAllocatorType.ArenaAllocator, info_cpu.GetAllocatorType());
+                Assert.Equal(OrtMemType.Default, info_cpu.GetMemoryType());
 
-                using (var allocator = new MemoryAllocator(session, info_cpu))
+                using (var allocator = new OrtAllocator(session, info_cpu))
                 {
                     Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
                     Assert.True(info_cpu.CompareMemoryInfo(alloc_info));
 
                     uint size = 1024;
-                    MemoryAllocation chunk = allocator.Allocate(size);
+                    OrtMemoryAllocation chunk = allocator.Allocate(size);
                     Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
                     Assert.True(chunk.Info.CompareMemoryInfo(alloc_info));
@@ -1689,22 +1689,22 @@ void TestCPUAllocator(InferenceSession session)
         void TestCUDAAllocator(InferenceSession session)
         {
             int device_id = 0;
-            using (var info_cuda = new MemoryInfo(MemoryInfo.CUDA_allocator, AllocatorType.ArenaAllocator, device_id, MemoryType.Default))
+            using (var info_cuda = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA, OrtAllocatorType.ArenaAllocator, device_id, OrtMemType.Default))
             {
                 Assert.NotEqual(info_cuda.Pointer, IntPtr.Zero);
                 Assert.Equal("Cuda", info_cuda.Name);
                 Assert.Equal(device_id, info_cuda.Id);
-                Assert.Equal(AllocatorType.ArenaAllocator, info_cuda.GetAllocatorType());
-                Assert.Equal(MemoryType.Default, info_cuda.GetMemoryType());
+                Assert.Equal(OrtAllocatorType.ArenaAllocator, info_cuda.GetAllocatorType());
+                Assert.Equal(OrtMemType.Default, info_cuda.GetMemoryType());
 
-                using (var allocator = new MemoryAllocator(session, info_cuda))
+                using (var allocator = new OrtAllocator(session, info_cuda))
                 {
                     Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
                     Assert.True(info_cuda.CompareMemoryInfo(alloc_info));
 
                     uint size = 1024;
-                    MemoryAllocation chunk = allocator.Allocate(size);
+                    OrtMemoryAllocation chunk = allocator.Allocate(size);
                     Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
                     Assert.True(chunk.Info.CompareMemoryInfo(alloc_info));

From eea3851ade900d9e3948419e0d4be9bb8d429501 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 24 Jul 2020 16:57:33 -0700
Subject: [PATCH 17/39] Imlpement GetBoudnOutputNames and test it.

---
 .../InferenceSession.cs                       |  9 ++--
 .../Microsoft.ML.OnnxRuntime/OrtAllocator.cs  |  3 +-
 .../core/session/onnxruntime_c_api.h          | 25 +++++++--
 .../core/session/onnxruntime_cxx_api.h        |  5 ++
 .../core/session/onnxruntime_cxx_inline.h     | 36 +++++++++++++
 onnxruntime/core/session/inference_session.h  |  7 +--
 onnxruntime/core/session/onnxruntime_c_api.cc | 52 +++++++++++++++++--
 onnxruntime/core/session/ort_apis.h           |  2 +
 onnxruntime/test/shared_lib/test_inference.cc |  8 ++-
 9 files changed, 129 insertions(+), 18 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index b92bd1834ddbb..47107fe6a44fb 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -720,17 +720,14 @@ public void Run(RunOptions runOptions, IoBinding ioBinding)
         public string EndProfiling()
         {
             IntPtr nameHandle = IntPtr.Zero;
+            var allocator = OrtAllocator.DefaultInstance;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionEndProfiling(_nativeHandle,
-                                                                   OrtAllocator.DefaultInstance.Pointer,
+                                                                   allocator.Pointer,
                                                                    out nameHandle));
-            try
+            using(var allocation = new OrtMemoryAllocation(allocator, nameHandle, 0))
             {
                 return NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
-            finally
-            {
-                OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
-            }
         }
 
         //TODO: kept internal until implemented
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
index 6bec88fac9896..c8ca87f5b2961 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
@@ -217,8 +217,7 @@ public OrtMemoryAllocation(IntPtr pointer, uint size)
         /// <summary>
         /// This an instance with a piece of memory allocated
         /// by onnxruntime OrtAllocator. The same allocator will be used for
-        /// for memory disposal. For memory allocated elsewhere, the instance will not own the memory
-        /// and will not dispose of it.
+        /// for memory disposal.
         /// </summary>
         /// <param name="allocator"></param>
         /// <param name="pointer"></param>
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index c5bf998cafb3f..b3c1c439e14dd 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -865,9 +865,9 @@ struct OrtApi {
 
   /**
    * The function will bind the OrtValue to a device which specification is contained within OrtMemoryInfo
-   * You can either create an instance ofr OrtMemoryInfo or obtain one from the allocator that you are using.
-   * This is useful when one or more outputs have dynamic shapes and, it is hard to create an output OrtValue with
-   * a Tensor ahead of time.
+   * You can either create an instance of OrtMemoryInfo with a device id or obtain one from the allocator that you are created/using
+   * This is useful when one or more outputs have dynamic shapes and, it is hard to pre-allocated and bind a chunk of
+   * memory within OrtValue ahead of time.
    *
    * \param binding_ptr - an instance of OrtIoBinding created by CreateIoBinding()
    * \param name - name for the model output
@@ -876,6 +876,25 @@ struct OrtApi {
    */
   ORT_API2_STATUS(BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* val_ptr);
 
+  /**
+    * The function returns the names of the output in the order they were bound. This is useful after running the model
+    * with bound outputs because the returned names are in order in which output OrtValues are returned. This API is optional
+    * to use. If you knew the order of outputs you used for binding you would not need to use this API.
+    *
+    * \param  binding_ptr - a ptr to an instance of OrtIoBinding created obtained from CreateIoBinding()
+    * \param  allocator - a ptr to an instance of OrtAllocator obtained with CreateAllocator() or GetAllocatorWithDefaultOptions()
+    *                      the specified allocator will be used to allocate continuous buffers for output strings and lengths.
+    * \param buffer - pointer to a continuous buffer of non-zero terminated UTF-8 encoded strings. The number of strings stored is returned count parameter.
+    *                 this buffer will be allocated with the specified allocator and must be freed after it is no longer needed.
+    * \param lengths - a pointer to a continuous buffer of size_t lengths of strings returned in the buffer. The number of items is returned
+    *                  in the count. This buffer is allocated with the specified allocator and must be freed after it is no longer needed.
+    * \para count - is the number of strings returned. If the instance of OrtIoBiding has no bound outputs, zero is returned,
+    *              no memory allocation is performed and buffer and lengths are nullptr on return.
+    */
+  ORT_API2_STATUS(GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+                  _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count);
+
+
   /** Clears any previously specified bindings for inputs/outputs
    */
   void(ORT_API_CALL* ClearBoundInputs)(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index b99f486d3bc21..160d464ff2a56 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -333,10 +333,15 @@ struct Allocator : public Base<OrtAllocator> {
 };
 
 struct IoBinding : public Base<OrtIoBinding> {
+ private:
+  std::vector<std::string> GetOutputNames(OrtAllocator*) const;
+ public:
   explicit IoBinding(Session& session);
   void BindInput(const char* name, const Value&);
   void BindOutput(const char* name, const Value&);
   void BindOutput(const char* name, const MemoryInfo&);
+  std::vector<std::string> GetOutputNames() const;
+  std::vector<std::string> GetOutputNames(Allocator&) const;
   void ClearBoundInputs();
   void ClearBoundOutputs();
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index cc509be474a5a..ab6a775542356 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -115,6 +115,42 @@ inline void IoBinding::BindOutput(const char* name, const MemoryInfo& mem_info)
   ThrowOnError(GetApi().BindOutputToDevice(p_, name, mem_info));
 }
 
+inline std::vector<std::string> IoBinding::GetOutputNames(OrtAllocator* allocator) const {
+  std::vector<std::string> result;
+  auto free_fn = [allocator](void* p) { if (p) allocator->Free(allocator, p); };
+  using Ptr = std::unique_ptr<void, decltype(free_fn)>;
+
+  char* buffer = nullptr;
+  size_t* lengths = nullptr;
+  size_t count = 0;
+  ThrowOnError(GetApi().GetBoundOutputNames(p_, allocator, &buffer, &lengths, &count));
+
+  if (count == 0) {
+    return result;
+  }
+
+  Ptr buffer_g(buffer, free_fn);
+  Ptr lengths_g(lengths, free_fn);
+
+  result.reserve(count);
+  for (size_t i = 0; i < count; ++i) {
+    auto sz = *lengths;
+    result.emplace_back(buffer, sz);
+    buffer += sz;
+    ++lengths;
+  }
+  return result;
+}
+
+inline std::vector<std::string> IoBinding::GetOutputNames() const {
+  AllocatorWithDefaultOptions allocator;
+  return GetOutputNames(allocator);
+}
+
+inline std::vector<std::string> IoBinding::GetOutputNames(Allocator& allocator) const {
+  return GetOutputNames(allocator);
+}
+
 inline void IoBinding::ClearBoundInputs() {
   GetApi().ClearBoundInputs(p_);
 }
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index 31af210153e7b..2fe667f771760 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -358,11 +358,12 @@ class InferenceSession {
     * specified within mem_info
     * @param mem_info is a pointer to OrtMemoryInfo that contains requires specs
     * @return a ptr to the allocator or nullptr if not available
-  */
+    */
   AllocatorPtr GetAllocator(const OrtMemoryInfo* mem_info) const;
   
-   * Get InferenceSession logger.
-   */
+   /** 
+    *Get InferenceSession logger.
+    */
   const logging::Logger* GetLogger() const { return session_logger_; };
 
  protected:
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 89888d6695e00..5f3b3ef958c27 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -532,7 +532,7 @@ struct OrtIoBinding {
 };
 
 ORT_API_STATUS_IMPL(OrtApis::RunWithBinding, _Inout_ OrtSession* sess, _In_opt_ const OrtRunOptions* run_options,
-  const OrtIoBinding* binding_ptr) {
+                    const OrtIoBinding* binding_ptr) {
   API_IMPL_BEGIN
   auto session = reinterpret_cast<::onnxruntime::InferenceSession*>(sess);
   auto status = session->Run(*run_options, *binding_ptr->binding_);
@@ -590,6 +590,52 @@ ORT_API_STATUS_IMPL(OrtApis::BindOutputToDevice, _Inout_ OrtIoBinding* binding_p
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+                    _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count) {
+  API_IMPL_BEGIN
+  const auto& output_names = binding_ptr->binding_->GetOutputNames();
+  if (output_names.empty()) {
+    *buffer = nullptr;
+    *lengths = nullptr;
+    *count = 0U;
+    return nullptr;
+  }
+
+  IAllocatorUniquePtr<size_t> lengths_alloc(reinterpret_cast<size_t*>(allocator->Alloc(allocator, output_names.size() * sizeof(size_t))),
+                                            [allocator](size_t* p) { if(p) allocator->Free(allocator, p); });
+
+  if (!lengths_alloc) {
+    return OrtApis::CreateStatus(ORT_FAIL, "lengths allocation failed");
+  }
+
+  size_t total_len = 0;
+  auto* len_ptr = lengths_alloc.get();
+  for (const auto& n : output_names) {
+    auto sz = n.size();
+    total_len += sz;
+    *len_ptr++ = sz;
+  }
+
+  IAllocatorUniquePtr<char> buffer_alloc(reinterpret_cast<char*>(allocator->Alloc(allocator, total_len * sizeof(char))),
+                                         [allocator](char* p) { if(p) allocator->Free(allocator, p); });
+
+  if (!buffer_alloc) {
+    return OrtApis::CreateStatus(ORT_FAIL, "string buffer allocation failed");
+  }
+
+  char* buf_ptr = buffer_alloc.get();
+  for (const auto& n : output_names) {
+    auto sz = n.size();
+    memcpy(buf_ptr, n.data(), sz);
+    buf_ptr += sz;
+  }
+
+  *buffer = buffer_alloc.release();
+  *lengths = lengths_alloc.release();
+  *count = output_names.size();
+  return nullptr;
+  API_IMPL_END
+}
 
 void OrtApis::ClearBoundInputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
   binding_ptr->binding_->ClearInputs();
@@ -1699,13 +1745,13 @@ static constexpr OrtApi ort_api_1_to_4 = {
     &OrtApis::BindInput,
     &OrtApis::BindOutput,
     &OrtApis::BindOutputToDevice,
+    &OrtApis::GetBoundOutputNames,
     &OrtApis::ClearBoundInputs,
     &OrtApis::ClearBoundOutputs,
 
     // feel free to add/remove/rearrange here
     &OrtApis::GetAvailableProviders,
-    &OrtApis::ReleaseAvailableProviders
-};
+    &OrtApis::ReleaseAvailableProviders};
 
 // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other)
 // If this assert hits, read the above 'Rules on how to add a new Ort API version'
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index 1252eece39bf4..c1b54f44203d4 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -210,6 +210,8 @@ ORT_API(void, ReleaseIoBinding, _Frees_ptr_opt_ OrtIoBinding* allocator);
 ORT_API_STATUS_IMPL(BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
 ORT_API_STATUS_IMPL(BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
 ORT_API_STATUS_IMPL(BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* val_ptr);
+ORT_API_STATUS_IMPL(GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+                    _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count);
 
 ORT_API(void, ClearBoundInputs, _Inout_ OrtIoBinding* binding_ptr);
 ORT_API(void, ClearBoundOutputs, _Inout_ OrtIoBinding* binding_ptr);
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 183a2747f8faf..07c06ea24a796 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -424,7 +424,7 @@ TEST(CApiTest, get_allocator_cuda) {
 }
 #endif
 
-TEST(CApiTest, io_bnding) {
+TEST(CApiTest, io_binding) {
   Ort::SessionOptions session_options;
   OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1);
   Ort::Session session(*ort_env, MODEL_URI, session_options);
@@ -448,7 +448,13 @@ TEST(CApiTest, io_bnding) {
   binding.BindOutput("Y", bound_y);
 
   session.Run(Ort::RunOptions(), binding);
+  // Check the values against the bound raw memory
   ASSERT_TRUE(std::equal(std::cbegin(y_values), std::cend(y_values), std::cbegin(expected_y)));
+
+  std::vector<std::string> output_names = binding.GetOutputNames();
+  ASSERT_EQ(1U, output_names.size());
+  ASSERT_EQ(output_names[0].compare("Y"), 0);
+
   binding.ClearBoundInputs();
   binding.ClearBoundOutputs();
 }

From 202d9e97eee542f98960bad9bae6f6130e2f516f Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 27 Jul 2020 10:29:32 -0700
Subject: [PATCH 18/39] Fix docs and merge issues.

---
 .../onnxruntime/core/session/onnxruntime_c_api.h | 16 ++++++++++------
 onnxruntime/core/session/onnxruntime_c_api.cc    |  2 +-
 2 files changed, 11 insertions(+), 7 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index daaa4094d3291..de5e98ae42149 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -894,6 +894,9 @@ struct OrtApi {
   ORT_API2_STATUS(GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
                   _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count);
 
+  //ORT_API2_STATUS(GetBoundOutputValues, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+  //                __Out_ OrtValue** output, _Out_ size_t* count);
+
 
   /** Clears any previously specified bindings for inputs/outputs
    */
@@ -920,13 +923,14 @@ struct OrtApi {
                   _In_ int providers_length);
 
   /**
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param index index of string tensor element, length of element at index will be returned.
+     * \param value - A tensor created from OrtCreateTensor... function.
+     * \param index - index of string tensor element, length of element at index will be returned.
+     * \param out - number of UTF-8 bytes that the string contains
      */
   ORT_API2_STATUS(GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out);
 
   /**
-     * \param s string element contents. The string is NOT null-terminated.
+     * \param s string element contents in UTF-8 encoding. The string is NOT null-terminated.
      * \param value A tensor created from OrtCreateTensor... function.
      * \param s_len element length, get it from OrtGetStringTensorElementLength.
      * \param index offset of element of tensor to return.
@@ -934,9 +938,9 @@ struct OrtApi {
   ORT_API2_STATUS(GetStringTensorElement, _In_ const OrtValue* value, size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s);
 
   /**
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param s A null terminated string.
-     * \param index index of string tensor element to fill 
+     * \param value - A tensor created from OrtCreateTensor... function.
+     * \param s - A null terminated UTF-8 encoded string.
+     * \param index - index of string tensor element to fill 
      */
   ORT_API2_STATUS(FillStringTensorElement, _Inout_ OrtValue* value, _In_ const char* s, size_t index);
 };
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 444172fb4b4c7..7e98b150fad85 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -1782,7 +1782,7 @@ static constexpr OrtApi ort_api_1_to_4 = {
 
     // Version 4 - In development
 
-    // Allocator and Binding APIs are exposed via C# API , do not move
+    // Allocator extension and Binding APIs are exposed via C# API, do not move
     &OrtApis::CreateAllocator,
     &OrtApis::ReleaseAllocator,
     &OrtApis::RunWithBinding,

From 0ea6132603e2a9e99095f7cf5dead257126c44e3 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 27 Jul 2020 14:50:32 -0700
Subject: [PATCH 19/39] Implement GetBoundOutputValues and test.

---
 .../core/session/onnxruntime_c_api.h          | 24 ++++++---
 .../core/session/onnxruntime_cxx_api.h        |  6 +++
 .../core/session/onnxruntime_cxx_inline.h     | 51 ++++++++++++++++++-
 onnxruntime/core/session/onnxruntime_c_api.cc | 45 +++++++++++++++-
 onnxruntime/core/session/ort_apis.h           |  4 +-
 onnxruntime/test/shared_lib/test_inference.cc | 44 ++++++++++++++--
 6 files changed, 162 insertions(+), 12 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index de5e98ae42149..c4e59ab2c5379 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -877,9 +877,9 @@ struct OrtApi {
   ORT_API2_STATUS(BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* val_ptr);
 
   /**
-    * The function returns the names of the output in the order they were bound. This is useful after running the model
+    * The function returns the names of the outputs in the order they were bound. This is useful after running the model
     * with bound outputs because the returned names are in order in which output OrtValues are returned. This API is optional
-    * to use. If you knew the order of outputs you used for binding you would not need to use this API.
+    * to use. If you knew the order of outputs and its names you used for binding you would not need to use this API.
     *
     * \param  binding_ptr - a ptr to an instance of OrtIoBinding created obtained from CreateIoBinding()
     * \param  allocator - a ptr to an instance of OrtAllocator obtained with CreateAllocator() or GetAllocatorWithDefaultOptions()
@@ -891,12 +891,24 @@ struct OrtApi {
     * \para count - is the number of strings returned. If the instance of OrtIoBiding has no bound outputs, zero is returned,
     *              no memory allocation is performed and buffer and lengths are nullptr on return.
     */
-  ORT_API2_STATUS(GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+  ORT_API2_STATUS(GetBoundOutputNames, _In_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
                   _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count);
 
-  //ORT_API2_STATUS(GetBoundOutputValues, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
-  //                __Out_ OrtValue** output, _Out_ size_t* count);
-
+  /**
+    * The function returns an array of pointers to individually allocated OrtValues that contain results of a model execution with RunWithBinding()
+    * The array contains the same number of OrtValues and they are in the same order as they were bound with BindOutput()
+    * or BindOutputToDevice(). 
+    * The returned OrtValues must be individually released after they are no longer needed.
+    * The array is allocated using the specified instance of the allocator and must be freed using the same allocator after
+    * all the OrtValues contained therein are individually released.
+    *
+    * \param binding_ptr - instance of OrtIoBidning
+    * \param allocator - instance of allocator to allocate output array
+    * \param output - pointer to the allocated buffer. Returns nullptr if no outputs.
+    * \param output_count - pointer to the number of OrtValues returned. Zero if no outputs.
+    */
+  ORT_API2_STATUS(GetBoundOutputValues, _In_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+                  _Out_writes_all_(output_count) OrtValue*** output, _Out_ size_t* output_count);
 
   /** Clears any previously specified bindings for inputs/outputs
    */
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 4f7031c17e502..2b5cbc19a699c 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -296,6 +296,9 @@ struct Value : Base<OrtValue> {
   template <typename T>
   T* GetTensorMutableData();
 
+  template<typename T>
+  const T* GetTensorData() const;
+
   TypeInfo GetTypeInfo() const;
   TensorTypeAndShapeInfo GetTensorTypeAndShapeInfo() const;
 
@@ -341,6 +344,7 @@ struct Allocator : public Base<OrtAllocator> {
 struct IoBinding : public Base<OrtIoBinding> {
  private:
   std::vector<std::string> GetOutputNames(OrtAllocator*) const;
+  std::vector<Value> GetOutputValues(OrtAllocator*) const;
  public:
   explicit IoBinding(Session& session);
   void BindInput(const char* name, const Value&);
@@ -348,6 +352,8 @@ struct IoBinding : public Base<OrtIoBinding> {
   void BindOutput(const char* name, const MemoryInfo&);
   std::vector<std::string> GetOutputNames() const;
   std::vector<std::string> GetOutputNames(Allocator&) const;
+  std::vector<Value> GetOutputValues() const;
+  std::vector<Value> GetOutputValues(Allocator&) const;
   void ClearBoundInputs();
   void ClearBoundOutputs();
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 203cedef44a88..604ed426d5b3c 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -80,7 +80,7 @@ inline MemoryInfo::MemoryInfo(const char* name, OrtAllocatorType type, int id, O
 
 inline Allocator::Allocator(const Session& sess, const MemoryInfo& mem_info) {
   ThrowOnError(GetApi().CreateAllocator(sess.operator const OrtSession*(),
-                                                  mem_info.operator const OrtMemoryInfo*(), &p_));
+                                        mem_info.operator const OrtMemoryInfo*(), &p_));
 }
 
 inline void* Allocator::Alloc(size_t size) const {
@@ -151,6 +151,48 @@ inline std::vector<std::string> IoBinding::GetOutputNames(Allocator& allocator)
   return GetOutputNames(allocator);
 }
 
+inline std::vector<Value> Ort::IoBinding::GetOutputValues(OrtAllocator* allocator) const {
+  std::vector<Value> result;
+  size_t owned = 0;
+  size_t output_count = 0;
+  // Lambda to release the buffer when no longer needed and
+  // make sure that we destroy all instances on exception
+  auto free_fn = [&owned, &output_count, allocator](OrtValue** buffer) {
+    if (buffer) {
+      while (owned < output_count) {
+        auto* p = buffer + owned++;
+        GetApi().ReleaseValue(*p);
+      }
+      allocator->Free(allocator, buffer);
+    }
+  };
+  using Ptr = std::unique_ptr<OrtValue*, decltype(free_fn)> ;
+
+  OrtValue** output_buffer = nullptr;
+  ThrowOnError(GetApi().GetBoundOutputValues(p_, allocator, &output_buffer, &output_count));
+  if (output_count == 0) {
+    return result;
+  }
+
+  Ptr buffer_g(output_buffer, free_fn);
+
+  result.reserve(output_count);
+  for (size_t i = 0; i < output_count; ++i) {
+    result.emplace_back(output_buffer[i]);
+    ++owned;
+  }
+  return result;
+}
+
+inline std::vector<Value> Ort::IoBinding::GetOutputValues(Allocator& allocator) const {
+  return GetOutputValues(allocator);
+}
+
+inline std::vector<Value> Ort::IoBinding::GetOutputValues() const {
+  AllocatorWithDefaultOptions allocator;
+  return GetOutputValues(allocator);
+}
+
 inline void IoBinding::ClearBoundInputs() {
   GetApi().ClearBoundInputs(p_);
 }
@@ -593,6 +635,13 @@ T* Value::GetTensorMutableData() {
   return out;
 }
 
+template <typename T>
+const T* Value::GetTensorData() const {
+  T* out;
+  ThrowOnError(GetApi().GetTensorMutableData(p_, (void**)&out));
+  return out;
+}
+
 inline TypeInfo Value::GetTypeInfo() const {
   OrtTypeInfo* output;
   ThrowOnError(GetApi().GetTypeInfo(p_, &output));
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 7e98b150fad85..dbf25a9a6b4a4 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -590,7 +590,7 @@ ORT_API_STATUS_IMPL(OrtApis::BindOutputToDevice, _Inout_ OrtIoBinding* binding_p
   API_IMPL_END
 }
 
-ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputNames, _In_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
                     _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count) {
   API_IMPL_BEGIN
   const auto& output_names = binding_ptr->binding_->GetOutputNames();
@@ -637,6 +637,48 @@ ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputNames, _Inout_ const OrtIoBinding* bi
   API_IMPL_END
 }
 
+ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputValues, _In_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+                   _Out_writes_all_(output_count) OrtValue*** output, _Out_ size_t* output_count) {
+  API_IMPL_BEGIN
+  const auto& outputs = binding_ptr->binding_->GetOutputs();
+  if (outputs.empty()) {
+    *output = nullptr;
+    *output_count = 0U;
+    return nullptr;
+  }
+
+  // Used to destroy and de-allocate on exception
+  size_t created = 0;
+  IAllocatorUniquePtr<OrtValue*> ortvalues_alloc(reinterpret_cast<OrtValue**>(allocator->Alloc(allocator, outputs.size() * sizeof(OrtValue*))),
+                                                [&created, allocator](OrtValue** buffer) { 
+                                                 if (buffer) {
+                                                    while (created > 0) {
+                                                     auto p = buffer + --created;
+                                                      delete (*p);
+                                                    }
+                                                    allocator->Free(allocator, buffer);
+                                                  }
+                                                });
+
+  if (!ortvalues_alloc) {
+    return OrtApis::CreateStatus(ORT_FAIL, "Output buffer allocation failed");
+  }
+
+  OrtValue** out_ptr = ortvalues_alloc.get();
+  for (const auto& out_value : outputs) {
+    *out_ptr = new OrtValue(out_value);
+    ++created;
+  }
+
+  assert(created == outputs.size());
+
+  *output = ortvalues_alloc.release();
+  *output_count = created;
+  return nullptr;
+  API_IMPL_END
+}
+
+
 void OrtApis::ClearBoundInputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
   binding_ptr->binding_->ClearInputs();
 }
@@ -1792,6 +1834,7 @@ static constexpr OrtApi ort_api_1_to_4 = {
     &OrtApis::BindOutput,
     &OrtApis::BindOutputToDevice,
     &OrtApis::GetBoundOutputNames,
+    &OrtApis::GetBoundOutputValues,
     &OrtApis::ClearBoundInputs,
     &OrtApis::ClearBoundOutputs,
 
diff --git a/onnxruntime/core/session/ort_apis.h b/onnxruntime/core/session/ort_apis.h
index fd2acc54db17e..478dd45168b84 100644
--- a/onnxruntime/core/session/ort_apis.h
+++ b/onnxruntime/core/session/ort_apis.h
@@ -213,8 +213,10 @@ ORT_API(void, ReleaseIoBinding, _Frees_ptr_opt_ OrtIoBinding* allocator);
 ORT_API_STATUS_IMPL(BindInput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
 ORT_API_STATUS_IMPL(BindOutput, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtValue* val_ptr);
 ORT_API_STATUS_IMPL(BindOutputToDevice, _Inout_ OrtIoBinding* binding_ptr, _In_ const char* name, _In_ const OrtMemoryInfo* val_ptr);
-ORT_API_STATUS_IMPL(GetBoundOutputNames, _Inout_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+ORT_API_STATUS_IMPL(GetBoundOutputNames, _In_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
                     _Out_ char** buffer, _Out_writes_all_(count) size_t** lengths, _Out_ size_t* count);
+ORT_API_STATUS_IMPL(GetBoundOutputValues, _In_ const OrtIoBinding* binding_ptr, _In_ OrtAllocator* allocator,
+                    _Out_writes_all_(output_count) OrtValue*** output, _Out_ size_t* output_count);
 
 ORT_API(void, ClearBoundInputs, _Inout_ OrtIoBinding* binding_ptr);
 ORT_API(void, ClearBoundOutputs, _Inout_ OrtIoBinding* binding_ptr);
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 5d385198e655c..572b8f575e620 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -451,9 +451,47 @@ TEST(CApiTest, io_binding) {
   // Check the values against the bound raw memory
   ASSERT_TRUE(std::equal(std::cbegin(y_values), std::cend(y_values), std::cbegin(expected_y)));
 
-  std::vector<std::string> output_names = binding.GetOutputNames();
-  ASSERT_EQ(1U, output_names.size());
-  ASSERT_EQ(output_names[0].compare("Y"), 0);
+  // Now compare values via GetOutputValues
+  {
+    std::vector<Ort::Value> output_values = binding.GetOutputValues();
+    ASSERT_EQ(output_values.size(), 1U);
+    const Ort::Value& Y_value = output_values[0];
+    ASSERT_TRUE(Y_value.IsTensor());
+    Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
+    ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
+    auto count = type_info.GetElementCount();
+    ASSERT_EQ(expected_y.size(), count);
+    const float* values = Y_value.GetTensorData<float>();
+    ASSERT_TRUE(std::equal(values, values + count, std::cbegin(expected_y)));
+  }
+
+  {
+    std::vector<std::string> output_names = binding.GetOutputNames();
+    ASSERT_EQ(1U, output_names.size());
+    ASSERT_EQ(output_names[0].compare("Y"), 0);
+  }
+
+  // Now replace binding of Y with an on device binding instead of pre-allocated memory.
+  // This is when we can not allocate an OrtValue due to unknown dimensions
+  {
+    Ort::MemoryInfo info_cpu_dev("Cpu", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
+    binding.BindOutput("Y", info_cpu_dev);
+    session.Run(Ort::RunOptions(), binding);
+  }
+
+  // Check the output value allocated based on the device binding.
+  {
+    std::vector<Ort::Value> output_values = binding.GetOutputValues();
+    ASSERT_EQ(output_values.size(), 1U);
+    const Ort::Value& Y_value = output_values[0];
+    ASSERT_TRUE(Y_value.IsTensor());
+    Ort::TensorTypeAndShapeInfo type_info = Y_value.GetTensorTypeAndShapeInfo();
+    ASSERT_EQ(ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT, type_info.GetElementType());
+    auto count = type_info.GetElementCount();
+    ASSERT_EQ(expected_y.size(), count);
+    const float* values = Y_value.GetTensorData<float>();
+    ASSERT_TRUE(std::equal(values, values + count, std::cbegin(expected_y)));
+  }
 
   binding.ClearBoundInputs();
   binding.ClearBoundOutputs();

From 2910a0c6b11b390191978cbec17e93197842ea38 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 27 Jul 2020 18:45:33 -0700
Subject: [PATCH 20/39] Finish implementation of OrtIoBinding

---
 .../DisposableNamedOnnxValue.cs               |  13 +-
 .../InferenceSession.cs                       |  50 +++-
 .../src/Microsoft.ML.OnnxRuntime/IoBinding.cs | 117 ---------
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  70 ++++-
 .../NativeOnnxValueHelper.cs                  |   7 +-
 .../Microsoft.ML.OnnxRuntime/OrtAllocator.cs  |   6 +-
 .../Microsoft.ML.OnnxRuntime/OrtIoBinding.cs  | 245 ++++++++++++++++++
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  |  22 +-
 8 files changed, 381 insertions(+), 149 deletions(-)
 delete mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
 create mode 100644 csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
index 08cf3dc6810f2..cb3f01ce89b50 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
@@ -182,13 +182,10 @@ internal static DisposableNamedOnnxValue CreateTensorFromOnnxValue(string name,
 
         internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr nativeOnnxValue)
         {
-            IntPtr allocator = IntPtr.Zero;
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetAllocatorWithDefaultOptions(out allocator));
-            var ret = CreateFromOnnxValue(name, nativeOnnxValue, allocator);
-            return (DisposableNamedOnnxValue)ret;
+            return CreateFromOnnxValue(name, nativeOnnxValue, OrtAllocator.DefaultInstance);
         }
 
-        internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr nativeOnnxValue, IntPtr allocator)
+        internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr nativeOnnxValue, OrtAllocator allocator)
         {
             OnnxValueType onnxValueType;
             unsafe
@@ -207,7 +204,7 @@ internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr
                     for (int i = 0; i < count.ToInt32(); i++)
                     {
                         IntPtr nativeOnnxValueSeq;
-                        NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, i, allocator, out nativeOnnxValueSeq));
+                        NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, i, allocator.Pointer, out nativeOnnxValueSeq));
                         sequence.Add(CreateFromOnnxValue(string.Empty, nativeOnnxValueSeq, allocator));
                     }
                     return new DisposableNamedOnnxValue(name, sequence, OnnxValueType.ONNX_TYPE_SEQUENCE, TensorElementType.DataTypeMax, null);
@@ -217,8 +214,8 @@ internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr
                     IntPtr nativeOnnxValueMapKeys = IntPtr.Zero;
                     IntPtr nativeOnnxValueMapValues = IntPtr.Zero;
                     TensorElementType elemType = TensorElementType.DataTypeMax;
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, 0, allocator, out nativeOnnxValueMapKeys));
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, 1, allocator, out nativeOnnxValueMapValues));
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, 0, allocator.Pointer, out nativeOnnxValueMapKeys));
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, 1, allocator.Pointer, out nativeOnnxValueMapValues));
                     NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValueMapKeys, out typeAndShape));
 
                     unsafe
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 47107fe6a44fb..d9ded19551c36 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -695,23 +695,63 @@ public void Run(
             }
         }
 
-        public IoBinding CreateIOBinding()
+        public OrtIoBinding CreateIOBinding()
         {
-            return new IoBinding(this);
+            return new OrtIoBinding(this);
         }
 
         /// <summary>
         ///  Make this method return a collection of DisposableNamedOnnxValue as in other interfaces
-        ///  Query names from IoBinding object and pair then with the array of OrtValues returned
+        ///  Query names from OrtIoBinding object and pair then with the array of OrtValues returned
         ///  
         /// This method will run inference and will return outputs with names for the outputs
         /// previously bound to ioBinding instance.
         /// </summary>
         /// <param name="runOptions">RunOptions</param>
-        /// <param name="ioBinding">IoBinding instance with bindings</param>
-        public void Run(RunOptions runOptions, IoBinding ioBinding)
+        /// <param name="ioBinding">OrtIoBinding instance with bindings</param>
+        /// <param name="names">optional parameter. If you already know the names of the outputs you can save a native
+        /// call to retrieve parameter names. They will be paired with the returned OrtValues and combined into DisposbleNamedOnnxValues.
+        /// Otherwise, the method will retrieve output names from the OrtIoBinding instance.
+        /// It is an error if you supply a different number of names than the returned outputs</param>
+        public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(RunOptions runOptions, OrtIoBinding ioBinding, string[] names = null)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
+            using (var ortValues = ioBinding.GetOutputValues())
+            {
+                string[] outputNames = names;
+                if (outputNames == null)
+                {
+                    outputNames = ioBinding.GetOutputNames();
+                }
+
+                if (outputNames.Length != ortValues.Count)
+                {
+                    if (names != null)
+                    {
+                        throw new OnnxRuntimeException(ErrorCode.InvalidArgument,
+                            "Number of specified names: " + names.Length + " does not match the output number: " +
+                            ortValues.Count);
+                    }
+                    throw new OnnxRuntimeException(ErrorCode.EngineError,
+                        "BUG check. Number of fetched output names does not match number of outputs");
+                }
+
+                var result = new DisposableList<DisposableNamedOnnxValue>(outputNames.Length);
+                try
+                {
+                    for (int i = 0; i < outputNames.Length; ++i)
+                    {
+                        var ortValue = ortValues.ElementAt<OrtValue>(i);
+                        result.Add(DisposableNamedOnnxValue.CreateTensorFromOnnxValue(outputNames[i], ortValue.Handle));
+                        ortValue.Disown();
+                    }
+                } catch(Exception e)
+                {
+                    result.Dispose();
+                    throw e;
+                }
+                return result;
+            }
         }
 
         /// <summary>
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
deleted file mode 100644
index a0264d7cb3488..0000000000000
--- a/csharp/src/Microsoft.ML.OnnxRuntime/IoBinding.cs
+++ /dev/null
@@ -1,117 +0,0 @@
-﻿using System;
-using System.Buffers;
-using System.Collections.Generic;
-using System.Runtime.InteropServices;
-using System.Text;
-
-namespace Microsoft.ML.OnnxRuntime
-{
-    /// <summary>
-    /// This class enable to bind inputs and outputs to pre-allocated
-    /// memory. This enables interesting scenarios. For example, if your input
-    /// already resides in some pre-allocated memory even if on a device you bind
-    /// that piece of memory to an input name and shape and onnxruntime will use that as input.
-    /// Other traditional inputs can also be bound that already exists as Tensors
-    /// </summary>
-    public class IoBinding : IDisposable
-    {
-        private IntPtr _handle;
-
-        /// <summary>
-        /// Use InferenceSession.CreateIOBinding()
-        /// </summary>
-        /// <param name="session"></param>
-        internal IoBinding(InferenceSession session)
-        {
-            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateIoBinding(session.Handle, out _handle));
-        }
-
-        internal IntPtr Handle
-        {
-            get
-            {
-                return _handle;
-            }
-        }
-
-        public void BindInput(string name, Tensors.TensorElementType elementType, long[] shape, OrtMemoryAllocation allocation)
-        {
-            using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
-                                                                    elementType,
-                                                                    shape,
-                                                                    allocation.Pointer, allocation.Size))
-                BindIntputOrOutput(name, ortValue.Handle, true);
-        }
-
-        public void BindInput(string name, FixedBufferOnnxValue fixedValue)
-        {
-            if(fixedValue.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
-            {
-                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
-            }
-            BindIntputOrOutput(name, fixedValue.Value, true);
-        }
-
-        public void BindOutput(string name, Tensors.TensorElementType elementType, long[] shape, OrtMemoryAllocation allocation)
-        {
-            using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
-                                                                    elementType,
-                                                                    shape,
-                                                                    allocation.Pointer, allocation.Size))
-                BindIntputOrOutput(name, ortValue.Handle, false);
-        }
-
-        public void BindOutput(string name, FixedBufferOnnxValue fixedValue)
-        {
-            if (fixedValue.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
-            {
-                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
-            }
-            BindIntputOrOutput(name, fixedValue.Value, false);
-        }
-
-        private void BindIntputOrOutput(string name, IntPtr ortValue, bool isInput)
-        {
-            var utf8NamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
-            using (var pinnedName = new PinnedGCHandle(utf8NamePinned))
-            {
-                if (isInput)
-                {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtBindInput(_handle, pinnedName.Pointer, ortValue));
-                }
-                else
-                {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtBindOutput(_handle, pinnedName.Pointer, ortValue));
-                }
-            }
-        }
-
-        public void ClearBoundInputs()
-        {
-            NativeMethods.OrtClearBoundInputs(_handle);
-        }
-
-        public void ClearBoundOutputs()
-        {
-            NativeMethods.OrtClearBoundOutputs(_handle);
-        }
-
-        #region Disposable Support
-        protected virtual void Dispose(bool disposing)
-        {
-            if(disposing)
-            {
-                NativeMethods.OrtReleaseIoBinding(_handle);
-                _handle = IntPtr.Zero;
-            }
-        }
-        public void Dispose()
-        {
-            Dispose(true);
-            GC.SuppressFinalize(this);
-        }
-
-        // No need for the finalizer
-        #endregion
-    }
-}
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index cf8e95ca32344..332d7676d01d8 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -164,6 +164,9 @@ public struct OrtApi
         public IntPtr ReleaseIoBinding;
         public IntPtr BindInput;
         public IntPtr BindOutput;
+        public IntPtr BindOutputToDevice;
+        public IntPtr GetBoundOutputNames;
+        public IntPtr GetBoundOutputValues;
         public IntPtr ClearBoundInputs;
         public IntPtr ClearBoundOutputs;
     }
@@ -258,6 +261,9 @@ static NativeMethods()
             OrtReleaseIoBinding = (DOrtReleaseIoBinding)Marshal.GetDelegateForFunctionPointer(api_.ReleaseIoBinding, typeof(DOrtReleaseIoBinding));
             OrtBindInput = (DOrtBindInput)Marshal.GetDelegateForFunctionPointer(api_.BindInput, typeof(DOrtBindInput));
             OrtBindOutput = (DOrtBindOutput)Marshal.GetDelegateForFunctionPointer(api_.BindOutput, typeof(DOrtBindOutput));
+            OrtBindOutputToDevice = (DOrtBindOutputToDevice)Marshal.GetDelegateForFunctionPointer(api_.BindOutputToDevice, typeof(DOrtBindOutputToDevice));
+            OrtGetBoundOutputNames = (DOrtGetBoundOutputNames)Marshal.GetDelegateForFunctionPointer(api_.GetBoundOutputNames, typeof(DOrtGetBoundOutputNames));
+            OrtGetBoundOutputValues = (DOrtGetBoundOutputValues)Marshal.GetDelegateForFunctionPointer(api_.GetBoundOutputValues, typeof(DOrtGetBoundOutputValues));
             OrtClearBoundInputs = (DOrtClearBoundInputs)Marshal.GetDelegateForFunctionPointer(api_.ClearBoundInputs, typeof(DOrtClearBoundInputs));
             OrtClearBoundOutputs = (DOrtClearBoundOutputs)Marshal.GetDelegateForFunctionPointer(api_.ClearBoundOutputs, typeof(DOrtClearBoundOutputs));
 
@@ -647,8 +653,8 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// and write model outputs to the supplied memory.
         /// </summary>
         /// <param name="session">session to create OrtIoBinding instance</param>
-        /// <param name="io_bidning">out a new instance of OrtIoBinding</param>
-        public delegate IntPtr DOrtCreateIoBinding(IntPtr /*(const OrtSession*)*/ session, out IntPtr /*(OrtIoBinding)*/ io_binding);
+        /// <param name="io_binding">out a new instance of OrtIoBinding</param>
+        public delegate IntPtr /* OrtStatus*/ DOrtCreateIoBinding(IntPtr /*(const OrtSession*)*/ session, out IntPtr /*(OrtIoBinding)*/ io_binding);
         public static DOrtCreateIoBinding OrtCreateIoBinding;
 
         /// <summary>
@@ -667,7 +673,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="ort_value">OrtValue that is used for input (may wrap arbitrary memory). 
         ///      The param instance is copied internally so this argument may be released.
         /// </param>
-        public delegate IntPtr DOrtBindInput(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*)*/ name, IntPtr /*const OrtValue**/ ort_value);
+        public delegate IntPtr /* OrtStatus*/ DOrtBindInput(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*)*/ name, IntPtr /*const OrtValue**/ ort_value);
         public static DOrtBindInput OrtBindInput;
 
         /// <summary>
@@ -679,22 +685,70 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="ort_value">OrtValue that is used for output (may wrap arbitrary memory). 
         ///      The param instance is copied internally so this argument may be released.
         /// </param>
-        public delegate IntPtr DOrtBindOutput(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*) */ name, IntPtr /*const OrtValue**/ ort_value);
+        public delegate IntPtr /* OrtStatus*/ DOrtBindOutput(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*) */ name, IntPtr /*const OrtValue**/ ort_value);
         public static DOrtBindOutput OrtBindOutput;
 
+        /// <summary>
+        /// Bind a device to the model output with the specified name
+        /// This is useful when the OrtValue can not be allocated ahead of time
+        /// due to unknown dimensions.
+        /// </summary>
+        /// <param name="io_binding">Instance of OrtIoBinding</param>
+        /// <param name="name">UTF-8 zero terminated name</param>
+        /// <param name="mem_info">OrtMemoryInfo instance that contains device id. May be obtained from the device specific allocator instance</param>
+        /// <returns></returns>
+        public delegate IntPtr /* OrtStatus*/ DOrtBindOutputToDevice(IntPtr /*(OrtIoBinding)*/ io_binding, IntPtr /*(const char*) */ name, IntPtr /* const OrtMemoryInfo */ mem_info);
+        public static DOrtBindOutputToDevice OrtBindOutputToDevice;
+
+        /// <summary>
+        /// The function will return all bound output names in the order they were bound.
+        /// It is the same order that the output values will be returned after RunWithBinding() is used.
+        /// The function will allocate two native allocations  using the allocator supplied.
+        /// The caller is responsible for deallocating both of the buffers using the same allocator.
+        /// You may use OrtMemoryAllocation disposable class to wrap those allocations.
+        /// </summary>
+        /// <param name="io_binding">instance of OrtIoBinding</param>
+        /// <param name="allocator">allocator to use for memory allocation</param>
+        /// <param name="buffer">a continuous buffer that contains all output names.
+        /// Names are not zero terminated use lengths to extract strings. This needs to be deallocated.</param>
+        /// <param name="lengths">A buffer that contains lengths (size_t) for each of the returned strings in order.
+        /// You may use Marshal.ReadIntPtr() to read each element of the array. The buffer must be deallocated.</param>
+        /// <param name="count">this contains the count of names returned which is the number of elements in lengths.</param>
+        /// <returns></returns>
+        public delegate IntPtr /* OrtStatus*/ DOrtGetBoundOutputNames(IntPtr /* (const OrtIoBinding*) */ io_binding, IntPtr /* OrtAllocator* */ allocator,
+                                                                      out IntPtr /* char** */ buffer, out IntPtr /* size_t** */ lengths, out UIntPtr count);
+        public static DOrtGetBoundOutputNames OrtGetBoundOutputNames;
+
+        /// <summary>
+        /// The function returns output values after the model has been run with RunWithBinding()
+        /// It returns a natively allocated buffer of OrtValue pointers. All of the OrtValues must be individually
+        /// released after no longer needed. You may use OrtValue disposable class to wrap the native handle and properly dispose it
+        /// in connection with DisposableList<T>. All values are returned in the same order as they were bound.
+        /// The buffer that contains OrtValues must deallocated using the same allocator that was specified as an argument.
+        /// You may use an instance OrtMemoryAllocation to properly dispose of the native memory.
+        /// </summary>
+        /// <param name="io_binding">instance of OrtIOBinding</param>
+        /// <param name="allocator">allocator to use to allocate output buffer</param>
+        /// <param name="ortvalues">allocated buffer that contains pointers (IntPtr) to individual OrtValue instances</param>
+        /// <param name="count">count of OrtValues returned</param>
+        /// <returns></returns>
+        public delegate IntPtr /* OrtStatus*/ DOrtGetBoundOutputValues(IntPtr /* (const OrtIoBinding*) */ io_binding, IntPtr /* OrtAllocator* */ allocator,
+                                                                       out IntPtr /* OrtValue** */ ortvalues, out UIntPtr count);
+        public static DOrtGetBoundOutputValues OrtGetBoundOutputValues;
+
         /// <summary>
         /// Clears Input bindings. This is a convenience method.
-        /// Releasing IoBinding instance would clear all bound inputs.
+        /// Releasing OrtIoBinding instance would clear all bound inputs.
         /// </summary>
-        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        /// <param name="io_binding">instance of OrtIoBinding</param>
         public delegate void DOrtClearBoundInputs(IntPtr /*(OrtIoBinding)*/ io_binding);
         public static DOrtClearBoundInputs OrtClearBoundInputs;
 
         /// <summary>
         /// Clears Output bindings. This is a convenience method.
-        /// Releasing IoBinding instance would clear all bound outputs.
+        /// Releasing OrtIoBinding instance would clear all bound outputs.
         /// </summary>
-        /// <param name="io_bidning">instance of OrtIoBinding</param>
+        /// <param name="io_binding">instance of OrtIoBinding</param>
         public delegate void DOrtClearBoundOutputs(IntPtr /*(OrtIoBinding)*/ io_binding);
         public static DOrtClearBoundOutputs OrtClearBoundOutputs;
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index 08658c647dd44..5bbb12b196aab 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -385,7 +385,7 @@ private static bool TryPinAsTensor<T>(
         /// <returns>UTF-8 encoded equivalent</returns>
         internal static byte[] StringToZeroTerminatedUtf8(string s)
         {
-            return UTF8Encoding.UTF8.GetBytes(s + '\0');
+            return UTF8Encoding.UTF8.GetBytes(s + Char.MinValue);
         }
 
         /// <summary>
@@ -399,6 +399,11 @@ internal static string StringFromNativeUtf8(IntPtr nativeUtf8)
             // .NET 5.0 has Marshal.PtrToStringUTF8 that does the below
             int len = 0;
             while (Marshal.ReadByte(nativeUtf8, len) != 0) ++len;
+            return StringFromNativeUtf8(nativeUtf8, 0, len);
+        }
+
+        internal static string StringFromNativeUtf8(IntPtr nativeUtf8, int  offset, int len)
+        {
             byte[] buffer = new byte[len];
             Marshal.Copy(nativeUtf8, buffer, 0, buffer.Length);
             return Encoding.UTF8.GetString(buffer, 0, buffer.Length);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
index c8ca87f5b2961..b7efbc3a382d7 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
@@ -82,9 +82,9 @@ internal OrtMemoryInfo(IntPtr allocInfo, bool owned)
 
         // Predefined utf8 encoded allocator names. Use them to construct an instance of
         // OrtMemoryInfo
-        public static readonly byte[] allocatorCPU = Encoding.UTF8.GetBytes("Cpu" + '\0');
-        public static readonly byte[] allocatorCUDA = Encoding.UTF8.GetBytes("Cuda" + '\0');
-        public static readonly byte[] allocatorCUDA_PINNED = Encoding.UTF8.GetBytes("CudaPinned" + '\0');
+        public static readonly byte[] allocatorCPU = Encoding.UTF8.GetBytes("Cpu" + Char.MinValue);
+        public static readonly byte[] allocatorCUDA = Encoding.UTF8.GetBytes("Cuda" + Char.MinValue);
+        public static readonly byte[] allocatorCUDA_PINNED = Encoding.UTF8.GetBytes("CudaPinned" + Char.MinValue);
         /// <summary>
         /// Create an instance of OrtMemoryInfo according to the specification
         /// Memory info instances are usually used to get a handle of a native allocator
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
new file mode 100644
index 0000000000000..7626ceb78fc89
--- /dev/null
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -0,0 +1,245 @@
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using System;
+using System.Runtime.InteropServices;
+
+namespace Microsoft.ML.OnnxRuntime
+{
+    /// <summary>
+    /// This class enable to bind inputs and outputs to pre-allocated
+    /// memory. This enables interesting scenarios. For example, if your input
+    /// already resides in some pre-allocated memory even if on a device you bind
+    /// that piece of memory to an input name and shape and onnxruntime will use that as input.
+    /// Other traditional inputs can also be bound that already exists as Tensors
+    /// </summary>
+    public class OrtIoBinding : IDisposable
+    {
+        private IntPtr _handle;
+
+        /// <summary>
+        /// Use InferenceSession.CreateIOBinding()
+        /// </summary>
+        /// <param name="session"></param>
+        internal OrtIoBinding(InferenceSession session)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateIoBinding(session.Handle, out _handle));
+        }
+
+        internal IntPtr Handle
+        {
+            get
+            {
+                return _handle;
+            }
+        }
+
+        /// <summary>
+        /// Bind a piece of pre-allocated native memory as a OrtValue Tensor with a given shape
+        /// to an input with a given name. The model will read the specified input from that memory
+        /// possibly avoiding the need to copy between devices. OrtMemoryAllocation continues to own
+        /// the chunk of native memory and should be alive until the end of execution.
+        /// The size of the allocation can not be less than required.
+        /// by the Tensor of the given size.
+        /// </summary>
+        /// <param name="name">of the input</param>
+        /// <param name="elementType">Tensor element type</param>
+        /// <param name="shape"></param>
+        /// <param name="allocation">native memory allocation</param>
+        public void BindInput(string name, Tensors.TensorElementType elementType, long[] shape, OrtMemoryAllocation allocation)
+        {
+            using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
+                                                                    elementType,
+                                                                    shape,
+                                                                    allocation.Pointer, allocation.Size))
+                BindIntputOrOutput(name, ortValue.Handle, true);
+        }
+
+        /// <summary>
+        /// Bind the input with the given name as an OrtValue Tensor allocated in pinned managed memory.
+        /// Instance of FixedBufferOnnxValue owns the memory and should be alive until the end of execution.
+        /// </summary>
+        /// <param name="name">name of input</param>
+        /// <param name="fixedValue"></param>
+        public void BindInput(string name, FixedBufferOnnxValue fixedValue)
+        {
+            if(fixedValue.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
+            {
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
+            }
+            BindIntputOrOutput(name, fixedValue.Value, true);
+        }
+
+        /// <summary>
+        /// Bind model output to an OrtValue as Tensor with a given type and shape. An instance of OrtMemoryAllocaiton
+        /// owns the memory and should be alive for the time of execution.The size of the allocation can not be less than required
+        /// by the Tensor of the given size.
+        /// </summary>
+        /// <param name="name">of the output</param>
+        /// <param name="elementType">tensor element type</param>
+        /// <param name="shape">tensor shape</param>
+        /// <param name="allocation">allocated memory</param>
+        public void BindOutput(string name, Tensors.TensorElementType elementType, long[] shape, OrtMemoryAllocation allocation)
+        {
+            using (var ortValue = OrtValue.CreateTensorValueWithData(allocation.Info,
+                                                                    elementType,
+                                                                    shape,
+                                                                    allocation.Pointer, allocation.Size))
+                BindIntputOrOutput(name, ortValue.Handle, false);
+        }
+
+        /// <summary>
+        /// Bind model output to a given instance of FixedBufferOnnxValue which owns the underlying
+        /// pinned managed memory and should be alive for the time of execution.
+        /// </summary>
+        /// <param name="name">of the output</param>
+        /// <param name="fixedValue"></param>
+        public void BindOutput(string name, FixedBufferOnnxValue fixedValue)
+        {
+            if (fixedValue.OnnxValueType != OnnxValueType.ONNX_TYPE_TENSOR)
+            {
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
+            }
+            BindIntputOrOutput(name, fixedValue.Value, false);
+        }
+
+        /// <summary>
+        /// This function will bind model output with the given name to a device
+        /// specified by the memInfo.
+        /// </summary>
+        /// <param name="name">output name</param>
+        /// <param name="memInfo">instance of memory info</param>
+        public void BindOutputToDevice(string name, OrtMemoryInfo memInfo)
+        {
+            var utf8NamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
+            using (var pinnedName = new PinnedGCHandle(utf8NamePinned))
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtBindOutputToDevice(_handle, pinnedName.Pointer, memInfo.Pointer));
+        }
+
+        /// <summary>
+        /// Internal helper
+        /// </summary>
+        /// <param name="name"></param>
+        /// <param name="ortValue"></param>
+        /// <param name="isInput"></param>
+        private void BindIntputOrOutput(string name, IntPtr ortValue, bool isInput)
+        {
+            var utf8NamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
+            using (var pinnedName = new PinnedGCHandle(utf8NamePinned))
+            {
+                if (isInput)
+                {
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtBindInput(_handle, pinnedName.Pointer, ortValue));
+                }
+                else
+                {
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtBindOutput(_handle, pinnedName.Pointer, ortValue));
+                }
+            }
+        }
+
+        /// <summary>
+        /// Returns an array of output names in the same order they were bound
+        /// </summary>
+        /// <returns>array of output names</returns>
+        public string[] GetOutputNames()
+        {
+            IntPtr buffer = IntPtr.Zero;
+            IntPtr lengths = IntPtr.Zero;
+            UIntPtr count = UIntPtr.Zero;
+            var allocator = OrtAllocator.DefaultInstance;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetBoundOutputNames(_handle, allocator.Pointer, out buffer, out lengths, out count));
+
+            if(count.Equals(UIntPtr.Zero))
+            {
+                return new string[0];
+            }
+
+            using (var bufferAllocation = new OrtMemoryAllocation(allocator, buffer, 0))
+            using (var lengthsAllocation = new OrtMemoryAllocation(allocator, lengths, 0))
+            {
+                uint outputCount = (uint)count;
+                string[] result = new string[outputCount];
+                int readOffset = 0;
+                for(int i = 0; i < outputCount; ++i)
+                {
+                    // strLen in bytes
+                    int strLen = (int)Marshal.ReadIntPtr(lengths, IntPtr.Size * i);
+                    result[i] = NativeOnnxValueHelper.StringFromNativeUtf8(buffer, readOffset, strLen);
+                    readOffset += strLen;
+                }
+                return result;
+            }
+        }
+
+        /// <summary>
+        /// This fetches bound outputs after running the model with RunWithBinding()
+        /// </summary>
+        /// <returns>IDisposableReadOnlyCollection<OrtValue>if no outputs returns null</returns>
+        public IDisposableReadOnlyCollection<OrtValue> GetOutputValues()
+        {
+            IntPtr ortValues = IntPtr.Zero;
+            UIntPtr count = UIntPtr.Zero;
+            var allocator = OrtAllocator.DefaultInstance;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetBoundOutputValues(_handle, allocator.Pointer, out ortValues, out count));
+
+            if(count.Equals(UIntPtr.Zero))
+            {
+                return null;
+            }
+
+            using(var ortValuesAllocation = new OrtMemoryAllocation(allocator, ortValues, 0))
+            {
+                int outputCount = (int)count;
+                var ortList = new DisposableList<OrtValue>(outputCount);
+                try
+                {
+                    for(int i = 0; i < outputCount; ++i)
+                    {
+                        IntPtr ortValue = Marshal.ReadIntPtr(ortValues, IntPtr.Size * i);
+                        ortList.Add(new OrtValue(ortValue));
+                    }
+                } catch(Exception e)
+                {
+                    ortList.Dispose();
+                    throw e;
+                }
+                return ortList;
+            }
+        }
+
+        /// <summary>
+        /// Clear all bound inputs and start anew
+        /// </summary>
+        public void ClearBoundInputs()
+        {
+            NativeMethods.OrtClearBoundInputs(_handle);
+        }
+
+        /// <summary>
+        /// Clear all bound outputs
+        /// </summary>
+        public void ClearBoundOutputs()
+        {
+            NativeMethods.OrtClearBoundOutputs(_handle);
+        }
+
+        #region Disposable Support
+        protected virtual void Dispose(bool disposing)
+        {
+            if(disposing)
+            {
+                NativeMethods.OrtReleaseIoBinding(_handle);
+                _handle = IntPtr.Zero;
+            }
+        }
+        public void Dispose()
+        {
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+
+        // No need for the finalizer
+        #endregion
+    }
+}
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index e28be06f533fe..396fff9c1da87 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -1,7 +1,8 @@
-﻿using Microsoft.ML.OnnxRuntime.Tensors;
+﻿// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
-using System.Collections.Generic;
-using System.Text;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -22,11 +23,10 @@ internal OrtValue(IntPtr handle)
         internal IntPtr Handle { get; private set; }
 
         /// <summary>
-        /// This internal interface is used to transfer ownership to
-        /// DisposableNamdOnnxValue class.
+        /// This internal interface is used to transfer ownership elsewhere.
         /// </summary>
         /// <returns></returns>
-        internal IntPtr TakeOwnership()
+        internal IntPtr Disown()
         {
             var handle = Handle;
             Handle = IntPtr.Zero;
@@ -53,8 +53,16 @@ public static OrtValue CreateTensorValueWithData(OrtMemoryInfo memInfo, TensorEl
                                                          IntPtr dataBuffer,
                                                          uint bufferLength)
         {
+            Type type;
+            int width;
+            TensorElementTypeConverter.GetTypeAndWidth(elementType, out type, out width);
+            if(width == 0)
+            {
+                throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Unknown tensor type");
+            }
+
             var shapeSize = ArrayUtilities.GetSizeForShape(shape);
-            if(shapeSize > bufferLength)
+            if((shapeSize * width) > bufferLength)
             {
                 throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Can not bind the shape to smaller buffer");
             }

From 61b48f8a0040ee78d60df58cf1d0cf9c96152393 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 28 Jul 2020 15:03:34 -0700
Subject: [PATCH 21/39] Complete testing.

---
 .../DisposableNamedOnnxValue.cs               |  18 +-
 .../FixedBufferOnnxValue.cs                   |   5 -
 .../InferenceSession.cs                       |  48 ++--
 .../Microsoft.ML.OnnxRuntime/OrtIoBinding.cs  |   6 +-
 .../InferenceTest.cs                          | 207 +++++++++++++-----
 5 files changed, 194 insertions(+), 90 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
index cb3f01ce89b50..38a4d3157b8e9 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
@@ -30,27 +30,23 @@ protected virtual void Dispose(bool disposing)
             {
                 if (disposing)
                 {
-                    // TODO: dispose managed state (managed objects).
-                    for (int i = 0; i < this.Count; i++)
+                    // Dispose in the reverse order.
+                    // Objects should typically be destroyed/disposed
+                    // in the reverse order of its creation
+                    // especially if the objects created later refer to the
+                    // objects created earlier. For homogeneous collections of objects
+                    // it would not matter.
+                    for (int i = this.Count - 1; i >= 0; --i)
                     {
                         this[i]?.Dispose();
                     }
                     this.Clear();
                 }
 
-                // TODO: free unmanaged resources (unmanaged objects) and override a finalizer below.
-                // TODO: set large fields to null.
-
                 disposedValue = true;
             }
         }
 
-        ~DisposableList()
-        {
-            // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
-            Dispose(false);
-        }
-
         // This code added to correctly implement the disposable pattern.
         public void Dispose()
         {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
index 206e87d035ba7..3745e8042633b 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@@ -63,11 +63,6 @@ protected virtual void Dispose(bool disposing)
             }
         }
 
-        ~FixedBufferOnnxValue()
-        {
-            Dispose(false);
-        }
-
         public void Dispose()
         {
             Dispose(true);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index d9ded19551c36..dde150e927477 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -695,25 +695,44 @@ public void Run(
             }
         }
 
-        public OrtIoBinding CreateIOBinding()
+        /// <summary>
+        /// Create OrtIoBinding instance to bind pre-allocated buffers
+        /// to input/output
+        /// </summary>
+        /// <returns></returns>
+        public OrtIoBinding CreateIoBinding()
         {
             return new OrtIoBinding(this);
         }
 
         /// <summary>
-        ///  Make this method return a collection of DisposableNamedOnnxValue as in other interfaces
+        /// This method runs inference on the OrtIoBinding instance
+        /// The method does not return anything. This is a lightweight version of 
+        /// RunWithBindingAndNames(). When you bind pre-allocated buffers to the output values
+        /// you may not want to fetch the outputs since you already have access to them so you can spare
+        /// the expense of fetching them and pairing with names.
+        /// You can still fetch the outputs by calling OrtIOBinding.GetOutputValues()
+        /// </summary>
+        /// <param name="runOptions"></param>
+        /// <param name="ioBinding"></param>
+        public void RunWithBinding(RunOptions runOptions, OrtIoBinding ioBinding)
+        {
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
+        }
+
+        /// <summary>
+        ///  This method return a collection of DisposableNamedOnnxValue as in other interfaces
         ///  Query names from OrtIoBinding object and pair then with the array of OrtValues returned
-        ///  
-        /// This method will run inference and will return outputs with names for the outputs
-        /// previously bound to ioBinding instance.
+        /// from OrtIoBinding.GetOutputValues()
+        /// 
         /// </summary>
         /// <param name="runOptions">RunOptions</param>
         /// <param name="ioBinding">OrtIoBinding instance with bindings</param>
         /// <param name="names">optional parameter. If you already know the names of the outputs you can save a native
-        /// call to retrieve parameter names. They will be paired with the returned OrtValues and combined into DisposbleNamedOnnxValues.
+        /// call to retrieve output names. They will be paired with the returned OrtValues and combined into DisposbleNamedOnnxValues.
         /// Otherwise, the method will retrieve output names from the OrtIoBinding instance.
         /// It is an error if you supply a different number of names than the returned outputs</param>
-        public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(RunOptions runOptions, OrtIoBinding ioBinding, string[] names = null)
+        public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> RunWithBindingAndNames(RunOptions runOptions, OrtIoBinding ioBinding, string[] names = null)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtRunWithBinding(Handle, runOptions.Handle, ioBinding.Handle));
             using (var ortValues = ioBinding.GetOutputValues())
@@ -726,14 +745,9 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(RunOptions ru
 
                 if (outputNames.Length != ortValues.Count)
                 {
-                    if (names != null)
-                    {
-                        throw new OnnxRuntimeException(ErrorCode.InvalidArgument,
-                            "Number of specified names: " + names.Length + " does not match the output number: " +
-                            ortValues.Count);
-                    }
-                    throw new OnnxRuntimeException(ErrorCode.EngineError,
-                        "BUG check. Number of fetched output names does not match number of outputs");
+                    throw new OnnxRuntimeException(ErrorCode.InvalidArgument,
+                        "Number of specified names: " + names.Length + " does not match the output number: " +
+                        ortValues.Count);
                 }
 
                 var result = new DisposableList<DisposableNamedOnnxValue>(outputNames.Length);
@@ -741,8 +755,10 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(RunOptions ru
                 {
                     for (int i = 0; i < outputNames.Length; ++i)
                     {
-                        var ortValue = ortValues.ElementAt<OrtValue>(i);
+                        var ortValue = ortValues.ElementAt(i);
                         result.Add(DisposableNamedOnnxValue.CreateTensorFromOnnxValue(outputNames[i], ortValue.Handle));
+                        // We transferred ownership of the handle.
+                        // Make sure it is not disposed here
                         ortValue.Disown();
                     }
                 } catch(Exception e)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
index 7626ceb78fc89..a421fbc84d8f6 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -18,7 +18,7 @@ public class OrtIoBinding : IDisposable
         private IntPtr _handle;
 
         /// <summary>
-        /// Use InferenceSession.CreateIOBinding()
+        /// Use InferenceSession.CreateIoBinding()
         /// </summary>
         /// <param name="session"></param>
         internal OrtIoBinding(InferenceSession session)
@@ -175,7 +175,7 @@ public string[] GetOutputNames()
         /// <summary>
         /// This fetches bound outputs after running the model with RunWithBinding()
         /// </summary>
-        /// <returns>IDisposableReadOnlyCollection<OrtValue>if no outputs returns null</returns>
+        /// <returns>IDisposableReadOnlyCollection<OrtValue></returns>
         public IDisposableReadOnlyCollection<OrtValue> GetOutputValues()
         {
             IntPtr ortValues = IntPtr.Zero;
@@ -185,7 +185,7 @@ public IDisposableReadOnlyCollection<OrtValue> GetOutputValues()
 
             if(count.Equals(UIntPtr.Zero))
             {
-                return null;
+                return new DisposableList<OrtValue>();
             }
 
             using(var ortValuesAllocation = new OrtMemoryAllocation(allocator, ortValues, 0))
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 3ae109c1acfa6..aac35661c107a 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1,12 +1,12 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
-using System.IO;
 using System.Collections.Generic;
+using System.IO;
 using System.Linq;
 using System.Runtime.InteropServices;
-using Microsoft.ML.OnnxRuntime.Tensors;
 using System.Threading.Tasks;
 using Xunit;
 using Xunit.Abstractions;
@@ -1719,7 +1719,6 @@ void TestCUDAAllocator(InferenceSession session)
         private void TestAllocator()
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
-            // Set the optimized model file path to assert that no exception are thrown.
             using (SessionOptions options = new SessionOptions())
             {
                 options.AppendExecutionProvider_CPU(1);
@@ -1736,6 +1735,136 @@ private void TestAllocator()
             }
         }
 
+        /// <summary>
+        /// This works only for allocations accessible from host memory
+        /// </summary>
+        /// <param name="buffer"></param>
+        /// <param name="elements"></param>
+        private static void PopulateNativeBufferFloat(OrtMemoryAllocation buffer, float[] elements)
+        {
+            if(buffer.Size < elements.Length * sizeof(float))
+            {
+                Assert.True(false);
+            }
+
+            unsafe
+            {
+                float* p = (float*)buffer.Pointer;
+                for (int i = 0; i < elements.Length; ++i)
+                {
+                    *p++ = elements[i];
+                }
+            }
+        }
+
+        private static void CompareNativeBufferFloat(OrtMemoryAllocation buffer, float[] elements, IEqualityComparer<float> comp)
+        {
+            if (buffer.Size != elements.Length * sizeof(float))
+            {
+                Assert.True(false);
+            }
+
+            unsafe
+            {
+                float* p = (float*)buffer.Pointer;
+                for (int i = 0; i < elements.Length; ++i)
+                {
+                    Assert.True(comp.Equals(*p++, elements[i]));
+                }
+            }
+        }
+
+        [Fact]
+        private void TestBinding()
+        {
+            var inputName = "data_0";
+            var outputName = "softmaxout_1";
+            var allocator = OrtAllocator.DefaultInstance;
+            // From the model
+            using (var dispList = new DisposableList<IDisposable>())
+            {
+                var tuple = OpenSessionSqueezeNet();
+                var session = tuple.Item1;
+                var inputData = tuple.Item2;
+                var inputTensor = tuple.Item3;
+                var outputData = tuple.Item4;
+                dispList.Add(session);
+                var outputMeta = session.OutputMetadata;
+                var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
+
+                // var inputs = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor<float>("data_0", inputTensor) };
+                var ioBinding = session.CreateIoBinding();
+                dispList.Add(ioBinding);
+
+                var ortAllocationInput = allocator.Allocate((uint)inputData.Length * sizeof(float));
+                dispList.Add(ortAllocationInput);
+                PopulateNativeBufferFloat(ortAllocationInput, inputData);
+
+                var ortAllocationOutput = allocator.Allocate((uint)outputData.Length * sizeof(float));
+                dispList.Add(ortAllocationOutput);
+
+                // Test 1. Bind input to fixed, Bind Output to Fixed.
+                using (FixedBufferOnnxValue fixeInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor),
+                      fixedOutputBuffer = FixedBufferOnnxValue.CreateFromTensor(outputTensor))
+                {
+                    ioBinding.BindInput(inputName, fixeInputBuffer);
+                    ioBinding.BindOutput(outputName, fixedOutputBuffer);
+                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    {
+                        Assert.Equal(1, outputs.Count);
+                        var output = outputs.First();
+                        Assert.Equal(outputName, output.Name);
+                        var tensor = output.AsTensor<float>();
+                        Assert.True(tensor.IsFixedSize);
+                        Assert.Equal(outputData, tensor.ToArray<float>(), new floatComparer());
+                    }
+                }
+
+                //Test 2. Use the same input as in Test 1
+                // but rebind the output to OrtAllocated buffer
+                using (FixedBufferOnnxValue fixedInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor))
+                {
+                    ioBinding.BindInput(inputName, fixedInputBuffer);
+                    var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
+                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
+                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    {
+                        Assert.Equal(1, outputs.Count);
+                        var output = outputs.First();
+                        Assert.Equal(outputName, output.Name);
+                        var tensor = output.AsTensor<float>();
+                        Assert.True(tensor.IsFixedSize);
+                        Assert.Equal(outputData, tensor.ToArray<float>(), new floatComparer());
+
+                        // Let's check that the output buffer actually contains the data
+                        CompareNativeBufferFloat(ortAllocationOutput, outputData, new floatComparer());
+                    }
+                }
+
+                // Test 3. Bind input to preallocated buffer. Output to a device so the allocation would happen
+                // automatically
+                using (FixedBufferOnnxValue fixedInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor))
+                {
+                    ioBinding.BindInput(inputName, fixedInputBuffer);
+                    ioBinding.BindOutputToDevice(outputName, allocator.Info);
+
+                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    {
+                        Assert.Equal(1, outputs.Count);
+                        var output = outputs.First();
+                        Assert.Equal(outputName, output.Name);
+                        var tensor = output.AsTensor<float>();
+                        Assert.True(tensor.IsFixedSize);
+                        Assert.Equal(outputData, tensor.ToArray<float>(), new floatComparer());
+                    }
+                }
+
+                // Rebinding would happen without these but we want run them.
+                ioBinding.ClearBoundInputs();
+                ioBinding.ClearBoundOutputs();
+            }
+        }
+
         [DllImport("kernel32", SetLastError = true)]
         static extern IntPtr LoadLibrary(string lpFileName);
 
@@ -2032,21 +2161,29 @@ static NamedOnnxValue CreateNamedOnnxValueFromRawData<T>(string name, byte[] raw
         static Tuple<InferenceSession, float[], DenseTensor<float>, float[]> OpenSessionSqueezeNet(int? cudaDeviceId = null)
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
-            var option = new SessionOptions();
 #if USE_CUDA
-            if (cudaDeviceId.HasValue)
+            using (var option = (cudaDeviceId.HasValue) ? 
+                SessionOptions.MakeSessionOptionWithCudaProvider(cudaDeviceId.Value) :
+                new SessionOptions())
             {
-                option = SessionOptions.MakeSessionOptionWithCudaProvider(cudaDeviceId.Value);
-            }
+                if(!cudaDeviceId.HasValue)
+                {
+                    option.AppendExecutionProvider_CPU(1);
+                }
+#else
+            using (var option = new SessionOptions())
+            {
+                option.AppendExecutionProvider_CPU(1);
 #endif
-            var session = (cudaDeviceId.HasValue)
-                ? new InferenceSession(modelPath, option)
-                : new InferenceSession(modelPath);
-            float[] inputData = LoadTensorFromFile(@"bench.in");
-            float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
-            var inputMeta = session.InputMetadata;
-            var tensor = new DenseTensor<float>(inputData, inputMeta["data_0"].Dimensions);
-            return new Tuple<InferenceSession, float[], DenseTensor<float>, float[]>(session, inputData, tensor, expectedOutput);
+                var session = (cudaDeviceId.HasValue)
+                    ? new InferenceSession(modelPath, option)
+                    : new InferenceSession(modelPath);
+                float[] inputData = LoadTensorFromFile(@"bench.in");
+                float[] expectedOutput = LoadTensorFromFile(@"bench.expected_out");
+                var inputMeta = session.InputMetadata;
+                var tensor = new DenseTensor<float>(inputData, inputMeta["data_0"].Dimensions);
+                return new Tuple<InferenceSession, float[], DenseTensor<float>, float[]>(session, inputData, tensor, expectedOutput);
+            }
         }
 
         class floatComparer : IEqualityComparer<float>
@@ -2102,44 +2239,4 @@ public SkipNonPackageTests()
         }
 
     }
-
-    // A Disposable list is a list of IDisposable objects. All elements will be disposed when the container is disposed.
-    internal class DisposableList<T> : List<T>, IDisposableReadOnlyCollection<T>
-    where T : IDisposable
-    {
-        public DisposableList() { }
-        public DisposableList(int count) : base(count) { }
-
-        #region IDisposable Support
-        private bool disposedValue = false; // To detect redundant calls
-
-        protected virtual void Dispose(bool disposing)
-        {
-            if (!disposedValue)
-            {
-                if (disposing)
-                {
-                    for (int i = 0; i < this.Count; i++)
-                    {
-                        this[i]?.Dispose();
-                    }
-                    this.Clear();
-                }
-
-                disposedValue = true;
-            }
-        }
-
-        ~DisposableList()
-        {
-            Dispose(false);
-        }
-
-        public void Dispose()
-        {
-            Dispose(true);
-            GC.SuppressFinalize(this);
-        }
-        #endregion
-    }
 }

From 764d4017ff508ddefed1fc39cf7c5801b87729d1 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 28 Jul 2020 16:47:41 -0700
Subject: [PATCH 22/39] Fix build failures, fix some leaks.

---
 .../DisposableNamedOnnxValue.cs               | 38 +++++++++----------
 .../InferenceSession.cs                       |  6 ++-
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  4 +-
 .../NativeOnnxTensorMemory.cs                 |  5 ++-
 .../core/session/onnxruntime_cxx_api.h        |  4 +-
 .../core/session/onnxruntime_cxx_inline.h     | 12 +++---
 onnxruntime/test/shared_lib/test_inference.cc |  2 +-
 7 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
index 38a4d3157b8e9..73143653f3386 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
@@ -112,22 +112,17 @@ internal static DisposableNamedOnnxValue CreateTensorFromOnnxValue(string name,
 
             /* Get Tensor element type */  //TODO: Assumed value is Tensor, need to support non-tensor types in future
             IntPtr typeAndShape = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValue, out typeAndShape));
             TensorElementType elemType = TensorElementType.DataTypeMax;
             try
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValue, out typeAndShape));
-                unsafe
-                {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, new IntPtr(&elemType)));
-                }
-
+                IntPtr el_type;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, out el_type));
+                elemType = (TensorElementType)el_type;
             }
             finally
             {
-                if (typeAndShape != IntPtr.Zero)
-                {
-                    NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
-                }
+                NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
             }
 
             switch (elemType)
@@ -183,11 +178,9 @@ internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr
 
         internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr nativeOnnxValue, OrtAllocator allocator)
         {
-            OnnxValueType onnxValueType;
-            unsafe
-            {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValueType(nativeOnnxValue, new IntPtr(&onnxValueType)));
-            }
+            IntPtr valueType;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValueType(nativeOnnxValue, out valueType));
+            OnnxValueType onnxValueType = (OnnxValueType)valueType;
             switch (onnxValueType)
             {
                 case OnnxValueType.ONNX_TYPE_TENSOR:
@@ -206,22 +199,25 @@ internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr
                     return new DisposableNamedOnnxValue(name, sequence, OnnxValueType.ONNX_TYPE_SEQUENCE, TensorElementType.DataTypeMax, null);
 
                 case OnnxValueType.ONNX_TYPE_MAP:
-                    IntPtr typeAndShape = IntPtr.Zero;
                     IntPtr nativeOnnxValueMapKeys = IntPtr.Zero;
                     IntPtr nativeOnnxValueMapValues = IntPtr.Zero;
-                    TensorElementType elemType = TensorElementType.DataTypeMax;
                     NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, 0, allocator.Pointer, out nativeOnnxValueMapKeys));
                     NativeApiStatus.VerifySuccess(NativeMethods.OrtGetValue(nativeOnnxValue, 1, allocator.Pointer, out nativeOnnxValueMapValues));
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValueMapKeys, out typeAndShape));
 
-                    unsafe
+                    IntPtr typeAndShape = IntPtr.Zero;
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(nativeOnnxValueMapKeys, out typeAndShape));
+                    TensorElementType elemType = TensorElementType.DataTypeMax;
+                    try 
                     {
-                        NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, new IntPtr(&elemType)));
+                        IntPtr el_type;
+                        NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, out el_type));
+                        elemType = (TensorElementType)el_type;
                     }
-                    if (typeAndShape != IntPtr.Zero)
+                    finally
                     {
                         NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
                     }
+
                     switch (elemType)
                     {
                         case TensorElementType.Int64:
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index dde150e927477..7d4f625878feb 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -1018,6 +1018,7 @@ internal static NodeMetadata GetMetadataFromTypeInfo(IntPtr typeInfo)
                 return new NodeMetadata(valueType, new int[] { }, new string[] { }, typeof(NamedOnnxValue));
             }
 
+            // This should not be released
             IntPtr tensorInfo;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCastTypeInfoToTensorInfo(typeInfo, out tensorInfo)); //(IntPtr)(int)(uint)
             // Convert the newly introduced OrtTypeInfo* to the older OrtTypeAndShapeInfo*
@@ -1026,9 +1027,10 @@ internal static NodeMetadata GetMetadataFromTypeInfo(IntPtr typeInfo)
                 return null;
 
             TensorElementType type;
-            unsafe
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(tensorInfo, new IntPtr(&type)));
+                IntPtr el_type;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(tensorInfo, out el_type));
+                type = (TensorElementType)el_type;
             }
             Type dotnetType = null;
             int width = 0;
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 332d7676d01d8..533656eb07ff1 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -762,7 +762,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
                                                                  out IntPtr /*(OrtValue**)*/ outputValue);
         public static DOrtGetValue OrtGetValue;
 
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetValueType(IntPtr /*(OrtValue*)*/ value, IntPtr /*(OnnxValueType*)*/ onnxtype);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetValueType(IntPtr /*(OrtValue*)*/ value, out IntPtr /*(OnnxValueType*)*/ onnxtype);
         public static DOrtGetValueType OrtGetValueType;
 
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetOnnxTypeFromTypeInfo(IntPtr /*(OrtTypeInfo*)*/ typeinfo, IntPtr /*(OnnxValueType*)*/ onnxtype);
@@ -828,7 +828,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public delegate void DOrtReleaseTensorTypeAndShapeInfo(IntPtr /*(OrtTensorTypeAndShapeInfo*)*/ value);
         public static DOrtReleaseTensorTypeAndShapeInfo OrtReleaseTensorTypeAndShapeInfo;
 
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorElementType(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, IntPtr /*(TensorElementType*)*/ output);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorElementType(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, out IntPtr /*(TensorElementType*)*/ output);
         public static DOrtGetTensorElementType OrtGetTensorElementType;
 
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetDimensionsCount(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, out UIntPtr output);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
index 240717b915f43..6819b4dffe528 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
@@ -43,9 +43,10 @@ public NativeOnnxTensorMemory(IntPtr onnxValueHandle)
 
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(onnxValueHandle, out typeAndShape));
                 TensorElementType elemType;
-                unsafe
                 {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, new IntPtr(&elemType)));
+                    IntPtr el_type;
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorElementType(typeAndShape, out el_type));
+                    elemType = (TensorElementType)el_type;
                 }
                 TensorElementTypeConverter.GetTypeAndWidth(elemType, out type, out width);
 
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 2b5cbc19a699c..17ca8e4a8fac8 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -343,8 +343,8 @@ struct Allocator : public Base<OrtAllocator> {
 
 struct IoBinding : public Base<OrtIoBinding> {
  private:
-  std::vector<std::string> GetOutputNames(OrtAllocator*) const;
-  std::vector<Value> GetOutputValues(OrtAllocator*) const;
+  std::vector<std::string> GetOutputNamesHelper(OrtAllocator*) const;
+  std::vector<Value> GetOutputValuesHelper(OrtAllocator*) const;
  public:
   explicit IoBinding(Session& session);
   void BindInput(const char* name, const Value&);
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 604ed426d5b3c..f7465007ee30d 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -115,7 +115,7 @@ inline void IoBinding::BindOutput(const char* name, const MemoryInfo& mem_info)
   ThrowOnError(GetApi().BindOutputToDevice(p_, name, mem_info));
 }
 
-inline std::vector<std::string> IoBinding::GetOutputNames(OrtAllocator* allocator) const {
+inline std::vector<std::string> IoBinding::GetOutputNamesHelper(OrtAllocator* allocator) const {
   std::vector<std::string> result;
   auto free_fn = [allocator](void* p) { if (p) allocator->Free(allocator, p); };
   using Ptr = std::unique_ptr<void, decltype(free_fn)>;
@@ -144,14 +144,14 @@ inline std::vector<std::string> IoBinding::GetOutputNames(OrtAllocator* allocato
 
 inline std::vector<std::string> IoBinding::GetOutputNames() const {
   AllocatorWithDefaultOptions allocator;
-  return GetOutputNames(allocator);
+  return GetOutputNamesHelper(allocator);
 }
 
 inline std::vector<std::string> IoBinding::GetOutputNames(Allocator& allocator) const {
-  return GetOutputNames(allocator);
+  return GetOutputNamesHelper(allocator);
 }
 
-inline std::vector<Value> Ort::IoBinding::GetOutputValues(OrtAllocator* allocator) const {
+inline std::vector<Value> Ort::IoBinding::GetOutputValuesHelper(OrtAllocator* allocator) const {
   std::vector<Value> result;
   size_t owned = 0;
   size_t output_count = 0;
@@ -185,12 +185,12 @@ inline std::vector<Value> Ort::IoBinding::GetOutputValues(OrtAllocator* allocato
 }
 
 inline std::vector<Value> Ort::IoBinding::GetOutputValues(Allocator& allocator) const {
-  return GetOutputValues(allocator);
+  return GetOutputValuesHelper(allocator);
 }
 
 inline std::vector<Value> Ort::IoBinding::GetOutputValues() const {
   AllocatorWithDefaultOptions allocator;
-  return GetOutputValues(allocator);
+  return GetOutputValuesHelper(allocator);
 }
 
 inline void IoBinding::ClearBoundInputs() {
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index cdc8b238f41b7..bf41042b5616a 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -426,7 +426,7 @@ TEST(CApiTest, get_allocator_cuda) {
 
 TEST(CApiTest, io_binding) {
   Ort::SessionOptions session_options;
-  OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1);
+  Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CPU(session_options, 1));
   Ort::Session session(*ort_env, MODEL_URI, session_options);
 
   Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);

From 224e7e711b857fd03e4ff87229939b1a8be87670 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 28 Jul 2020 17:05:24 -0700
Subject: [PATCH 23/39] Adjust some protos.

---
 .../InferenceSession.cs                       |  7 ++--
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  4 +--
 .../NativeOnnxTensorMemory.cs                 | 32 ++++++-------------
 3 files changed, 15 insertions(+), 28 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 7d4f625878feb..82dc52e3a16d4 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -1009,9 +1009,10 @@ private NodeMetadata GetOverridableInitializerMetadata(ulong index)
         internal static NodeMetadata GetMetadataFromTypeInfo(IntPtr typeInfo)
         {
             OnnxValueType valueType;
-            unsafe
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetOnnxTypeFromTypeInfo(typeInfo, new IntPtr(&valueType)));
+                IntPtr valType;
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetOnnxTypeFromTypeInfo(typeInfo, out valType));
+                valueType = (OnnxValueType)valType;
             }
             if (valueType != OnnxValueType.ONNX_TYPE_TENSOR && valueType != OnnxValueType.ONNX_TYPE_SPARSETENSOR)
             {
@@ -1053,7 +1054,7 @@ internal static NodeMetadata GetMetadataFromTypeInfo(IntPtr typeInfo)
             string[] symbolicDimensions = new string[(int)numDimensions];
             for (var i = 0; i < (int)numDimensions; i++)
             {
-                symbolicDimensions[i] = Marshal.PtrToStringAnsi(dimensionNamePtrs[i]); //assumes charset = ANSI
+                symbolicDimensions[i] = NativeOnnxValueHelper.StringFromNativeUtf8(dimensionNamePtrs[i]);
             }
 
             return new NodeMetadata(valueType, intDimensions, symbolicDimensions, dotnetType);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 533656eb07ff1..a58e8ba6e157e 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -765,7 +765,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetValueType(IntPtr /*(OrtValue*)*/ value, out IntPtr /*(OnnxValueType*)*/ onnxtype);
         public static DOrtGetValueType OrtGetValueType;
 
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetOnnxTypeFromTypeInfo(IntPtr /*(OrtTypeInfo*)*/ typeinfo, IntPtr /*(OnnxValueType*)*/ onnxtype);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetOnnxTypeFromTypeInfo(IntPtr /*(OrtTypeInfo*)*/ typeinfo, out IntPtr /*(OnnxValueType*)*/ onnxtype);
         public static DOrtGetOnnxTypeFromTypeInfo OrtGetOnnxTypeFromTypeInfo;
 
         public delegate IntPtr /*(OrtStatus*)*/ DOrtGetValueCount(IntPtr /*(OrtValue*)*/ value, out IntPtr /*(size_t*)*/ count);
@@ -866,7 +866,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
          * [2,0,4] -> 0
          * [-1,3,4] -> -1
          */
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorShapeElementCount(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, IntPtr /*(long*)*/ output);
+        public delegate IntPtr /*(OrtStatus*)*/ DOrtGetTensorShapeElementCount(IntPtr /*(const struct OrtTensorTypeAndShapeInfo*)*/ typeAndShapeInfo, out IntPtr /*(long*)*/ output);
         public static DOrtGetTensorShapeElementCount OrtGetTensorShapeElementCount;
 
         public delegate void DOrtReleaseValue(IntPtr /*(OrtValue*)*/ value);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
index 6819b4dffe528..337dce6707da8 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
@@ -34,14 +34,13 @@ internal class NativeOnnxTensorMemory<T> : MemoryManager<T>, NativeMemoryHandler
 
         public NativeOnnxTensorMemory(IntPtr onnxValueHandle)
         {
+            Type type = null;
+            int width = 0;
+            _onnxValueHandle = onnxValueHandle;
             IntPtr typeAndShape = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(onnxValueHandle, out typeAndShape));
             try
             {
-                Type type = null;
-                int width = 0;
-                _onnxValueHandle = onnxValueHandle;
-
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorTypeAndShape(onnxValueHandle, out typeAndShape));
                 TensorElementType elemType;
                 {
                     IntPtr el_type;
@@ -58,9 +57,10 @@ public NativeOnnxTensorMemory(IntPtr onnxValueHandle)
                 UIntPtr dimension;
                 long count;
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtGetDimensionsCount(typeAndShape, out dimension));
-                unsafe
                 {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorShapeElementCount(typeAndShape, new IntPtr(&count)));  // count can be negative. 
+                    IntPtr el_count;
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetTensorShapeElementCount(typeAndShape, out el_count));  // count can be negative. 
+                    count = (long)el_count;
                 }
                 if (count < 0)
                 {
@@ -68,10 +68,7 @@ public NativeOnnxTensorMemory(IntPtr onnxValueHandle)
                 }
 
                 long[] shape = new long[dimension.ToUInt64()];
-                unsafe
-                {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtGetDimensions(typeAndShape, shape, new UIntPtr(&dimension))); //Note: shape must be alive during the call
-                }
+                 NativeApiStatus.VerifySuccess(NativeMethods.OrtGetDimensions(typeAndShape, shape, dimension)); //Note: shape must be alive during the call
 
                 _elementCount = (int)count;
                 _dimensions = new int[dimension.ToUInt64()];
@@ -116,20 +113,9 @@ public NativeOnnxTensorMemory(IntPtr onnxValueHandle)
                     }
                 }
             }
-            catch (Exception e)
-            {
-                //TODO: cleanup any partially created state
-                //Do not call ReleaseTensor here. If the constructor has thrown exception, 
-                //then this NativeOnnxTensorWrapper is not created, so caller should take 
-                //appropriate action to dispose
-                throw e;
-            }
             finally
             {
-                if (typeAndShape != IntPtr.Zero)
-                {
-                    NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
-                }
+                NativeMethods.OrtReleaseTensorTypeAndShapeInfo(typeAndShape);
             }
         }
 

From e011a010177d1cb8f1770a33abe7a3d5198ecef3 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 29 Jul 2020 11:22:41 -0700
Subject: [PATCH 24/39] Address some review comments. Fix CI build failures.

---
 .../DisposableNamedOnnxValue.cs               |  9 +---
 .../NamedOnnxValue.cs                         |  8 +---
 .../NativeOnnxTensorMemory.cs                 |  7 +--
 .../Microsoft.ML.OnnxRuntime/OrtAllocator.cs  | 46 ++++++++++++++++---
 .../session/default_cpu_allocator_c_api.cc    |  4 +-
 onnxruntime/core/session/device_allocator.cc  |  2 +-
 onnxruntime/core/session/inference_session.cc |  4 +-
 onnxruntime/core/session/inference_session.h  |  4 +-
 onnxruntime/core/session/onnxruntime_c_api.cc |  4 +-
 onnxruntime/test/shared_lib/test_inference.cc |  6 +--
 10 files changed, 57 insertions(+), 37 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
index 73143653f3386..399d2bf33ff0a 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
@@ -1,11 +1,10 @@
 ﻿// Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
 using System.Buffers;
 using System.Collections.Generic;
-using Microsoft.ML.OnnxRuntime.Tensors;
-using System.Runtime.InteropServices;
 
 
 namespace Microsoft.ML.OnnxRuntime
@@ -306,12 +305,6 @@ protected virtual void Dispose(bool disposing)
             }
         }
 
-        ~DisposableNamedOnnxValue()
-        {
-            // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
-            Dispose(false);
-        }
-
         // This code added to correctly implement the disposable pattern.
         public void Dispose()
         {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
index e504d37bed039..4bc269e1c8573 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
@@ -1,14 +1,10 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
-using System;
-using System.Collections.Generic;
-using System.Text;
 using Microsoft.ML.OnnxRuntime.Tensors;
+using System;
 using System.Buffers;
-using System.Collections;
-using System.Diagnostics;
-using System.Runtime.InteropServices;
+using System.Collections.Generic;
 
 namespace Microsoft.ML.OnnxRuntime
 {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
index 337dce6707da8..91dcd6a2aed5f 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
@@ -1,14 +1,11 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
-using System.Collections.Generic;
-using System.Text;
 using System.Buffers;
-using System.Runtime.CompilerServices;
-using System.Runtime.InteropServices;
+using System.Text;
 using System.Threading;
-using Microsoft.ML.OnnxRuntime.Tensors;
 
 namespace Microsoft.ML.OnnxRuntime
 {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
index b7efbc3a382d7..1e6fade95400b 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
@@ -51,6 +51,9 @@ private static OrtMemoryInfo CreateCpuMemoryInfo()
             return new OrtMemoryInfo(memoryInfo, true);
         }
 
+        /// <summary>
+        /// Default CPU based instance
+        /// </summary>
         public static OrtMemoryInfo DefaultInstance
         {
             get
@@ -81,7 +84,7 @@ internal OrtMemoryInfo(IntPtr allocInfo, bool owned)
         }
 
         // Predefined utf8 encoded allocator names. Use them to construct an instance of
-        // OrtMemoryInfo
+        // OrtMemoryInfo to avoid UTF-16 to UTF-8 conversion
         public static readonly byte[] allocatorCPU = Encoding.UTF8.GetBytes("Cpu" + Char.MinValue);
         public static readonly byte[] allocatorCUDA = Encoding.UTF8.GetBytes("Cuda" + Char.MinValue);
         public static readonly byte[] allocatorCUDA_PINNED = Encoding.UTF8.GetBytes("CudaPinned" + Char.MinValue);
@@ -121,6 +124,9 @@ public OrtMemoryInfo(string allocatorName, OrtAllocatorType allocatorType, int d
         {
         }
 
+        /// <summary>
+        /// Name of the allocator associated with the OrtMemoryInfo instance
+        /// </summary>
         public string Name
         {
             get
@@ -131,6 +137,9 @@ public string Name
             }
         }
 
+        /// <summary>
+        /// Returns device ID
+        /// </summary>
         public int Id
         {
             get
@@ -154,6 +163,10 @@ public OrtMemType GetMemoryType()
             return memoryType;
         }
 
+        /// <summary>
+        /// Returns alloctor type
+        /// </summary>
+        /// <returns></returns>
         public OrtAllocatorType GetAllocatorType()
         {
             OrtAllocatorType allocatorType = OrtAllocatorType.ArenaAllocator;
@@ -215,14 +228,16 @@ public OrtMemoryAllocation(IntPtr pointer, uint size)
         }
 
         /// <summary>
-        /// This an instance with a piece of memory allocated
-        /// by onnxruntime OrtAllocator. The same allocator will be used for
-        /// for memory disposal.
+        /// This constructs an instance representing an native memory allocation.
+        /// Typically returned by OrtAllocator.Allocate(). However, some APIs return
+        /// natively allocated IntPtr using a specific allocator. It is a good practice
+        /// to wrap such a memory into OrtAllocation for proper disposal. You can set
+        /// size to zero if not known, which is not important for disposing.
         /// </summary>
         /// <param name="allocator"></param>
         /// <param name="pointer"></param>
         /// <param name="size"></param>
-        internal OrtMemoryAllocation(OrtAllocator allocator, IntPtr pointer, uint size)
+        public OrtMemoryAllocation(OrtAllocator allocator, IntPtr pointer, uint size)
         {
             _allocator = allocator;
             Pointer = pointer;
@@ -267,6 +282,11 @@ public void Dispose()
         #endregion
     }
 
+    /// <summary>
+    /// The class exposes native internal allocator for Onnxruntime.
+    /// This allocator enables you to allocate memory from the internal
+    /// memory pools including device allocations. Useful for binding.
+    /// </summary>
     public class OrtAllocator : IDisposable
     {
         private static readonly Lazy<OrtAllocator> _defaultInstance = new Lazy<OrtAllocator>(GetDefaultCpuAllocator);
@@ -282,6 +302,9 @@ private static OrtAllocator GetDefaultCpuAllocator()
             return new OrtAllocator(allocator, false);
         }
 
+        /// <summary>
+        /// Default CPU allocator instance
+        /// </summary>
         public static OrtAllocator DefaultInstance // May throw exception in every access, if the constructor have thrown an exception
         {
             get
@@ -309,12 +332,23 @@ internal OrtAllocator(IntPtr allocator, bool owned)
             this._owned = owned;
         }
 
+        /// <summary>
+        /// Creates an instance of OrtAllocator according to the specifications in OrtMemorInfo.
+        /// The requested allocator should be available within the given session instance. This means
+        /// both, the native library was build with specific allocators (for instance CUDA) and the corresponding
+        /// provider was added to SessionsOptions before instantiating the session object.
+        /// </summary>
+        /// <param name="session"></param>
+        /// <param name="memInfo"></param>
         public OrtAllocator(InferenceSession session, OrtMemoryInfo memInfo)
         {
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateAllocator(session.Handle, memInfo.Pointer, out _pointer));
             _owned = true;
         }
 
+        /// <summary>
+        /// OrtMemoryInfo instance owned by the allocator
+        /// </summary>
         public OrtMemoryInfo Info
         {
             get
@@ -327,7 +361,7 @@ public OrtMemoryInfo Info
         }
 
         /// <summary>
-        /// Allocate native memory
+        /// Allocate native memory. Returns a disposable instance of OrtMemoryAllocation
         /// </summary>
         /// <param name="size"></param>
         /// <returns></returns>
diff --git a/onnxruntime/core/session/default_cpu_allocator_c_api.cc b/onnxruntime/core/session/default_cpu_allocator_c_api.cc
index 21abc0a77b5d8..81798f138c1fd 100644
--- a/onnxruntime/core/session/default_cpu_allocator_c_api.cc
+++ b/onnxruntime/core/session/default_cpu_allocator_c_api.cc
@@ -43,9 +43,9 @@ struct OrtDefaultAllocator : OrtAllocatorImpl {
     return cpu_memory_info;
   }
 
+  ORT_DISALLOW_COPY_AND_ASSIGNMENT(OrtDefaultAllocator);
+
  private:
-  OrtDefaultAllocator(const OrtDefaultAllocator&) = delete;
-  OrtDefaultAllocator& operator=(const OrtDefaultAllocator&) = delete;
 
   OrtMemoryInfo* cpu_memory_info;
 };
diff --git a/onnxruntime/core/session/device_allocator.cc b/onnxruntime/core/session/device_allocator.cc
index b45cd02c84782..61cae5a008135 100644
--- a/onnxruntime/core/session/device_allocator.cc
+++ b/onnxruntime/core/session/device_allocator.cc
@@ -48,7 +48,7 @@ struct OrtAllocatorForDevice : public OrtAllocator {
 ORT_API_STATUS_IMPL(OrtApis::CreateAllocator, const OrtSession* sess, const OrtMemoryInfo* mem_info, _Outptr_ OrtAllocator** out) {
   API_IMPL_BEGIN
   auto session = reinterpret_cast<const ::onnxruntime::InferenceSession*>(sess);
-  auto allocator_ptr = session->GetAllocator(mem_info);
+  auto allocator_ptr = session->GetAllocator(*mem_info);
   if (!allocator_ptr) {
     return OrtApis::CreateStatus(ORT_INVALID_ARGUMENT, "No requested allocator available");
   }
diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc
index a86dec575ff46..dacbe7ff3f71d 100644
--- a/onnxruntime/core/session/inference_session.cc
+++ b/onnxruntime/core/session/inference_session.cc
@@ -1397,8 +1397,8 @@ std::string InferenceSession::EndProfiling() {
   return std::string();
 }
 
-AllocatorPtr InferenceSession::GetAllocator(const OrtMemoryInfo* mem_info) const {
-  return session_state_->GetAllocator(*mem_info);
+AllocatorPtr InferenceSession::GetAllocator(const OrtMemoryInfo& mem_info) const {
+  return session_state_->GetAllocator(mem_info);
  }
 
 // assumes model has already been loaded before
diff --git a/onnxruntime/core/session/inference_session.h b/onnxruntime/core/session/inference_session.h
index 2fe667f771760..b32fd62dc7fac 100644
--- a/onnxruntime/core/session/inference_session.h
+++ b/onnxruntime/core/session/inference_session.h
@@ -356,10 +356,10 @@ class InferenceSession {
   /**
     * Search registered execution providers for an allocator that has characteristics
     * specified within mem_info
-    * @param mem_info is a pointer to OrtMemoryInfo that contains requires specs
+    * @param mem_info is a reference to OrtMemoryInfo that contains required specs
     * @return a ptr to the allocator or nullptr if not available
     */
-  AllocatorPtr GetAllocator(const OrtMemoryInfo* mem_info) const;
+  AllocatorPtr GetAllocator(const OrtMemoryInfo& mem_info) const;
   
    /** 
     *Get InferenceSession logger.
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index dbf25a9a6b4a4..c8ed881ef777f 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -679,11 +679,11 @@ ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputValues, _In_ const OrtIoBinding* bind
 }
 
 
-void OrtApis::ClearBoundInputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
+ORT_API(void, OrtApis::ClearBoundInputs, _Inout_ OrtIoBinding* binding_ptr) {
   binding_ptr->binding_->ClearInputs();
 }
 
-void OrtApis::ClearBoundOutputs(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION {
+ORT_API(void, OrtApis::ClearBoundOutputs, _Inout_ OrtIoBinding* binding_ptr) {
   binding_ptr->binding_->ClearOutputs();
 }
 
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index bf41042b5616a..2f7cf49347fb1 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -449,7 +449,7 @@ TEST(CApiTest, io_binding) {
 
   session.Run(Ort::RunOptions(), binding);
   // Check the values against the bound raw memory
-  ASSERT_TRUE(std::equal(std::cbegin(y_values), std::cend(y_values), std::cbegin(expected_y)));
+  ASSERT_TRUE(std::equal(std::begin(y_values), std::end(y_values), std::begin(expected_y)));
 
   // Now compare values via GetOutputValues
   {
@@ -462,7 +462,7 @@ TEST(CApiTest, io_binding) {
     auto count = type_info.GetElementCount();
     ASSERT_EQ(expected_y.size(), count);
     const float* values = Y_value.GetTensorData<float>();
-    ASSERT_TRUE(std::equal(values, values + count, std::cbegin(expected_y)));
+    ASSERT_TRUE(std::equal(values, values + count, std::begin(expected_y)));
   }
 
   {
@@ -490,7 +490,7 @@ TEST(CApiTest, io_binding) {
     auto count = type_info.GetElementCount();
     ASSERT_EQ(expected_y.size(), count);
     const float* values = Y_value.GetTensorData<float>();
-    ASSERT_TRUE(std::equal(values, values + count, std::cbegin(expected_y)));
+    ASSERT_TRUE(std::equal(values, values + count, std::begin(expected_y)));
   }
 
   binding.ClearBoundInputs();

From dddd3683414d5169d464913ac9637c67dfeae200 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 29 Jul 2020 11:54:28 -0700
Subject: [PATCH 25/39] Advance ptr when creating output values.

---
 onnxruntime/core/session/onnxruntime_c_api.cc | 1 +
 1 file changed, 1 insertion(+)

diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index c8ed881ef777f..4c2acbf4855ea 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -667,6 +667,7 @@ ORT_API_STATUS_IMPL(OrtApis::GetBoundOutputValues, _In_ const OrtIoBinding* bind
   OrtValue** out_ptr = ortvalues_alloc.get();
   for (const auto& out_value : outputs) {
     *out_ptr = new OrtValue(out_value);
+    ++out_ptr;
     ++created;
   }
 

From 38c3686135a136c59dabe51e35103b5fa104da4c Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 29 Jul 2020 16:26:10 -0700
Subject: [PATCH 26/39] Save work before Bitlocker restart.

---
 .../Tensors/Tensor.cs                         | 75 +++++++++++++++++++
 1 file changed, 75 insertions(+)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
index 09d4634ab636e..afbe054dbc00c 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
@@ -17,6 +17,8 @@
 using System.Text;
 using System;
 using System.Runtime.CompilerServices;
+using System.Runtime.InteropServices;
+using System.Reflection;
 
 // Making this assembly's internals visible to the internal Test assembly
 [assembly: InternalsVisibleTo("Microsoft.ML.OnnxRuntime.Tests," +
@@ -53,6 +55,79 @@ public enum TensorElementType
         DataTypeMax = 17
     }
 
+    [StructLayout(LayoutKind.Sequential)]
+    internal struct Float16
+    {
+        public ushort Value { get; private set; }
+        public Float16(ushort val)
+        {
+            Value = val;
+        }
+    }
+
+    [StructLayout(LayoutKind.Sequential)]
+    internal struct BFloat16
+    {
+        public ushort Value { get; private set; }
+        public BFloat16(ushort val)
+        {
+            Value = val;
+        }
+    }
+
+    /// <summary>
+    /// Helps typecasting. Holds primitive type information
+    /// </summary>
+    internal class TensorTypeInfo
+    {
+        public TensorElementType ElementType { get; private set; }
+        public int TypeSize { get; private set; }
+        public bool IsString { get { return ElementType == TensorElementType.String; } }
+        public TensorTypeInfo(TensorElementType elementType, int typeSize)
+        {
+            ElementType = elementType;
+            TypeSize = typeSize;
+        }
+    }
+
+    internal class TensorBase
+    {
+        private static readonly Dictionary<Type, TensorTypeInfo> typeInfoMap =
+            new Dictionary<Type, TensorTypeInfo>()
+            {
+                { typeof(float), new TensorTypeInfo( TensorElementType.Float, sizeof(float)) },
+                { typeof(byte), new TensorTypeInfo( TensorElementType.UInt8, sizeof(byte)) },
+                { typeof(sbyte), new TensorTypeInfo( TensorElementType.Int8, sizeof(sbyte)) },
+                { typeof(ushort), new TensorTypeInfo( TensorElementType.UInt16, sizeof(ushort)) },
+                { typeof(short), new TensorTypeInfo( TensorElementType.Int16, sizeof(short)) },
+                { typeof(int), new TensorTypeInfo( TensorElementType.Int32, sizeof(int)) },
+                { typeof(long), new TensorTypeInfo( TensorElementType.Int64, sizeof(long)) },
+                { typeof(string), new TensorTypeInfo( TensorElementType.String, -1) },
+                { typeof(bool), new TensorTypeInfo( TensorElementType.Bool, sizeof(bool)) },
+                { typeof(Float16), new TensorTypeInfo( TensorElementType.Float16, sizeof(ushort)) },
+                { typeof(double), new TensorTypeInfo( TensorElementType.Double, sizeof(double)) },
+                { typeof(uint), new TensorTypeInfo( TensorElementType.UInt32, sizeof(uint)) },
+                { typeof(ulong), new TensorTypeInfo( TensorElementType.UInt64, sizeof(ulong)) },
+                { typeof(BFloat16), new TensorTypeInfo( TensorElementType.BFloat16, sizeof(ushort)) }
+            };
+
+        private readonly Type _primitiveType;
+        protected TensorBase(Type primitiveType)
+        {
+            _primitiveType = primitiveType;
+        }
+        /// <summary>
+        /// Queries the map returns result or null
+        /// </summary>
+        /// <returns></returns>
+        public TensorTypeInfo GetTypeInfo()
+        {
+            TensorTypeInfo result = null;
+            typeInfoMap.TryGetValue(_primitiveType, out result);
+            return result;
+        }
+    }
+
     /// <summary>
     /// Various methods for creating and manipulating Tensor&lt;T&gt;
     /// </summary>

From cb536fd3a625aa42a9fa9caf4d64d1c1dd076331 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 31 Jul 2020 16:45:57 -0700
Subject: [PATCH 27/39] Add necessary member functions to MemoryInfo.

---
 .../core/session/onnxruntime_cxx_api.h        | 63 ++++++++++++++++---
 .../core/session/onnxruntime_cxx_inline.h     | 47 ++++++++++++--
 onnxruntime/test/shared_lib/test_inference.cc | 15 +++--
 3 files changed, 104 insertions(+), 21 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 17ca8e4a8fac8..3f8b6fd8ce23a 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -83,6 +83,9 @@ ORT_DEFINE_RELEASE(IoBinding);
 // This is used internally by the C++ API. This is the common base class used by the wrapper objects.
 template <typename T>
 struct Base {
+
+  using contained_type = T;
+
   Base() = default;
   Base(T* p) : p_{p} {
     if (!p) throw Ort::Exception("Allocation failure", ORT_FAIL);
@@ -109,16 +112,38 @@ struct Base {
   }
 
   T* p_{};
-
-  template <typename>
-  friend struct Unowned;  // This friend line is needed to keep the centos C++ compiler from giving an error
 };
 
 template <typename T>
+struct Base<const T> {
+
+  using contained_type = const T;
+
+  Base() = default;
+  Base(const T* p) : p_{p} {
+    if (!p) throw Ort::Exception("Invalid instance ptr", ORT_INVALID_ARGUMENT);
+  }
+  ~Base() = default;
+
+  operator const T*() const { return p_; }
+
+ protected:
+  Base(const Base&) = delete;
+  Base& operator=(const Base&) = delete;
+  Base(Base&& v) noexcept : p_{v.p_} { v.p_ = nullptr; }
+  void operator=(Base&& v) noexcept {
+    p_ = v.p_;
+    v.p_ = nullptr;
+  }
+
+  const T* p_{};
+};
+
+template<typename T> 
 struct Unowned : T {
   Unowned(decltype(T::p_) p) : T{p} {}
   Unowned(Unowned&& v) : T{v.p_} {}
-  ~Unowned() { this->p_ = nullptr; }
+  ~Unowned() { this->release(); }
 };
 
 struct AllocatorWithDefaultOptions;
@@ -279,7 +304,7 @@ struct Value : Base<OrtValue> {
   static Value CreateOpaque(const char* domain, const char* type_name, const T&);
 
   template <typename T>
-  void GetOpaqueData(const char* domain, const char* type_name, T&);
+  void GetOpaqueData(const char* domain, const char* type_name, T&) const;
 
   explicit Value(std::nullptr_t) {}
   explicit Value(OrtValue* p) : Base<OrtValue>{p} {}
@@ -324,13 +349,33 @@ struct AllocatorWithDefaultOptions {
   OrtAllocator* p_{};
 };
 
-struct MemoryInfo : Base<OrtMemoryInfo> {
+template <typename B>
+struct BaseMemoryInfo : B {
+  BaseMemoryInfo() = default;
+  explicit BaseMemoryInfo(typename B::contained_type* p) : B(p) {}
+  ~BaseMemoryInfo() = default;
+  BaseMemoryInfo(BaseMemoryInfo&&) = default;
+  BaseMemoryInfo& operator=(BaseMemoryInfo&&) = default;
+
+  std::string GetAllocatorName() const;
+  OrtAllocatorType GetAllocatorType() const;
+  int GetDeviceId() const;
+  OrtMemType GetMemoryType() const;
+  template<typename U>
+  bool operator==(const BaseMemoryInfo<U>& o) const;
+};
+
+struct UnownedMemoryInfo : BaseMemoryInfo<Base<const OrtMemoryInfo> > {
+  explicit UnownedMemoryInfo(std::nullptr_t) {}
+  explicit UnownedMemoryInfo(const OrtMemoryInfo* p) : BaseMemoryInfo(p) {}
+};
+
+struct MemoryInfo : BaseMemoryInfo<Base<OrtMemoryInfo> > {
   static MemoryInfo CreateCpu(OrtAllocatorType type, OrtMemType mem_type1);
 
   explicit MemoryInfo(std::nullptr_t) {}
+  explicit MemoryInfo(OrtMemoryInfo* p) : BaseMemoryInfo(p) {}
   MemoryInfo(const char* name, OrtAllocatorType type, int id, OrtMemType mem_type);
-
-  explicit MemoryInfo(OrtMemoryInfo* p) : Base<OrtMemoryInfo>{p} {}
 };
 
 struct Allocator : public Base<OrtAllocator> {
@@ -338,7 +383,7 @@ struct Allocator : public Base<OrtAllocator> {
 
   void* Alloc(size_t size) const;
   void Free(void* p) const;
-  const OrtMemoryInfo* GetInfo() const;
+  UnownedMemoryInfo GetInfo() const;
 };
 
 struct IoBinding : public Base<OrtIoBinding> {
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index f7465007ee30d..a451c691d40dd 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -68,6 +68,42 @@ inline const OrtMemoryInfo* AllocatorWithDefaultOptions::GetInfo() const {
   return out;
 }
 
+template<typename B>
+inline std::string BaseMemoryInfo<B>::GetAllocatorName() const {
+  const char* name = nullptr;
+  ThrowOnError(GetApi().MemoryInfoGetName(p_, &name));
+  return std::string(name);
+}
+
+template <typename B>
+inline OrtAllocatorType BaseMemoryInfo<B>::GetAllocatorType() const {
+  OrtAllocatorType type;
+  ThrowOnError(GetApi().MemoryInfoGetType(p_, &type));
+  return type;
+}
+
+template <typename B>
+int BaseMemoryInfo<B>::GetDeviceId() const {
+  int id = 0;
+  ThrowOnError(GetApi().MemoryInfoGetId(p_, &id));
+  return id;
+}
+
+template <typename B>
+inline OrtMemType BaseMemoryInfo<B>::GetMemoryType() const {
+  OrtMemType type;
+  ThrowOnError(GetApi().MemoryInfoGetMemType(p_, &type));
+  return type;
+}
+
+template <typename B>
+template <typename U>
+inline bool BaseMemoryInfo<B>::operator==(const BaseMemoryInfo<U>& o) const {
+  int comp_result = 0;
+  ThrowOnError(Ort::GetApi().CompareMemoryInfo(p_, o, &comp_result));
+  return comp_result == 0;
+}
+
 inline MemoryInfo MemoryInfo::CreateCpu(OrtAllocatorType type, OrtMemType mem_type) {
   OrtMemoryInfo* p;
   ThrowOnError(GetApi().CreateCpuMemoryInfo(type, mem_type, &p));
@@ -79,8 +115,7 @@ inline MemoryInfo::MemoryInfo(const char* name, OrtAllocatorType type, int id, O
 }
 
 inline Allocator::Allocator(const Session& sess, const MemoryInfo& mem_info) {
-  ThrowOnError(GetApi().CreateAllocator(sess.operator const OrtSession*(),
-                                        mem_info.operator const OrtMemoryInfo*(), &p_));
+  ThrowOnError(GetApi().CreateAllocator(sess, mem_info, &p_));
 }
 
 inline void* Allocator::Alloc(size_t size) const {
@@ -93,10 +128,10 @@ inline void Allocator::Free(void* p) const {
   ThrowOnError(GetApi().AllocatorFree(p_, p));
 }
 
-inline const OrtMemoryInfo* Allocator::GetInfo() const {
+inline UnownedMemoryInfo Allocator::GetInfo() const {
   const OrtMemoryInfo* out = nullptr;
   ThrowOnError(GetApi().AllocatorGetInfo(p_, &out));
-  return out;
+  return UnownedMemoryInfo(out);
 }
 
 inline IoBinding::IoBinding(Session& session) {
@@ -524,7 +559,7 @@ inline std::vector<int64_t> TensorTypeAndShapeInfo::GetShape() const {
 inline Unowned<TensorTypeAndShapeInfo> TypeInfo::GetTensorTypeAndShapeInfo() const {
   const OrtTensorTypeAndShapeInfo* out;
   ThrowOnError(GetApi().CastTypeInfoToTensorInfo(p_, &out));
-  return Unowned<TensorTypeAndShapeInfo>{const_cast<OrtTensorTypeAndShapeInfo*>(out)};
+  return Unowned<TensorTypeAndShapeInfo>(const_cast<OrtTensorTypeAndShapeInfo*>(out));
 }
 
 inline ONNXType TypeInfo::GetONNXType() const {
@@ -578,7 +613,7 @@ inline Value Value::CreateOpaque(const char* domain, const char* type_name, cons
 }
 
 template <typename T>
-inline void Value::GetOpaqueData(const char* domain, const char* type_name, T& out) {
+inline void Value::GetOpaqueData(const char* domain, const char* type_name, T& out) const {
   ThrowOnError(GetApi().GetOpaqueValue(domain, type_name, p_, &out, sizeof(T)));
 }
 
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 2f7cf49347fb1..16cabc10f0443 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -398,9 +398,13 @@ TEST(CApiTest, get_allocator_cpu) {
   Ort::MemoryInfo info_cpu = Ort::MemoryInfo::CreateCpu(OrtAllocatorType::OrtArenaAllocator, OrtMemTypeDefault);
   Ort::Allocator cpu_allocator(session, info_cpu);
 
-  int com_result = 0;
-  Ort::ThrowOnError(Ort::GetApi().CompareMemoryInfo(info_cpu, cpu_allocator.GetInfo(), &com_result));
-  ASSERT_EQ(com_result, 0);
+  // CPU OrtMemoryInfo does not return OrtArenaAllocator on x86 but rather a device allocator
+  // which causes MemoryInfo that is used to request the allocator and the actual instance
+  // of MemoryInfo returned from the allocator exactly match, although they are functionally equivalent.
+  auto allocator_info = cpu_allocator.GetInfo();
+  ASSERT_EQ(info_cpu.GetAllocatorName(), allocator_info.GetAllocatorName());
+  ASSERT_EQ(info_cpu.GetDeviceId(), allocator_info.GetDeviceId());
+  ASSERT_EQ(info_cpu.GetMemoryType(), allocator_info.GetDeviceId());
   void* p = cpu_allocator.Alloc(1024);
   ASSERT_NE(p, nullptr);
   cpu_allocator.Free(p);
@@ -415,9 +419,8 @@ TEST(CApiTest, get_allocator_cuda) {
   Ort::MemoryInfo info_cuda("Cuda", OrtAllocatorType::OrtArenaAllocator, 0, OrtMemTypeDefault);
   Ort::Allocator cuda_allocator(session, info_cuda);
 
-  int com_result = 0;
-  Ort::ThrowOnError(Ort::GetApi().CompareMemoryInfo(info_cuda, cuda_allocator.GetInfo(), &com_result));
-  ASSERT_EQ(com_result, 0);
+  auto allocator_info = cuda_allocator.GetInfo();
+  ASSERT_TRUE(info_cuda == allocator_info);
   void* p = cuda_allocator.Alloc(1024);
   ASSERT_NE(p, nullptr);
   cuda_allocator.Free(p);

From 9a1c2cdd50e1286bef9f04946a3b44f17568adbc Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 31 Jul 2020 17:05:14 -0700
Subject: [PATCH 28/39] Address review comments.

---
 .../Microsoft.ML.OnnxRuntime/OrtAllocator.cs  | 22 ++++++++++++++++++-
 .../Microsoft.ML.OnnxRuntime/OrtIoBinding.cs  | 10 ++++-----
 .../InferenceTest.cs                          | 20 ++++++++++-------
 3 files changed, 38 insertions(+), 14 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
index 1e6fade95400b..f82f84e27e93d 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
@@ -174,12 +174,32 @@ public OrtAllocatorType GetAllocatorType()
             return allocatorType;
         }
 
-        public bool CompareMemoryInfo(OrtMemoryInfo other)
+        public override bool Equals(object obj)
         {
+            var other = obj as OrtMemoryInfo;
+            if(other == null)
+            {
+                return false;
+            }
+            return Equals(other);
+        }
+
+        public bool Equals(OrtMemoryInfo other)
+        {
+            if(this == other)
+            {
+                return true;
+            }
             int result = -1;
             NativeApiStatus.VerifySuccess(NativeMethods.OrtCompareMemoryInfo(_pointer, other._pointer, out result));
             return (result == 0);
         }
+
+        public override int GetHashCode()
+        {
+            return Pointer.ToInt32();
+        }
+
         #region IDisposable Support
         protected virtual void Dispose(bool disposing)
         {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
index a421fbc84d8f6..f6fa65e9790cc 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -52,7 +52,7 @@ public void BindInput(string name, Tensors.TensorElementType elementType, long[]
                                                                     elementType,
                                                                     shape,
                                                                     allocation.Pointer, allocation.Size))
-                BindIntputOrOutput(name, ortValue.Handle, true);
+                BindInputOrOutput(name, ortValue.Handle, true);
         }
 
         /// <summary>
@@ -67,7 +67,7 @@ public void BindInput(string name, FixedBufferOnnxValue fixedValue)
             {
                 throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
             }
-            BindIntputOrOutput(name, fixedValue.Value, true);
+            BindInputOrOutput(name, fixedValue.Value, true);
         }
 
         /// <summary>
@@ -85,7 +85,7 @@ public void BindOutput(string name, Tensors.TensorElementType elementType, long[
                                                                     elementType,
                                                                     shape,
                                                                     allocation.Pointer, allocation.Size))
-                BindIntputOrOutput(name, ortValue.Handle, false);
+                BindInputOrOutput(name, ortValue.Handle, false);
         }
 
         /// <summary>
@@ -100,7 +100,7 @@ public void BindOutput(string name, FixedBufferOnnxValue fixedValue)
             {
                 throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
             }
-            BindIntputOrOutput(name, fixedValue.Value, false);
+            BindInputOrOutput(name, fixedValue.Value, false);
         }
 
         /// <summary>
@@ -122,7 +122,7 @@ public void BindOutputToDevice(string name, OrtMemoryInfo memInfo)
         /// <param name="name"></param>
         /// <param name="ortValue"></param>
         /// <param name="isInput"></param>
-        private void BindIntputOrOutput(string name, IntPtr ortValue, bool isInput)
+        private void BindInputOrOutput(string name, IntPtr ortValue, bool isInput)
         {
             var utf8NamePinned = GCHandle.Alloc(NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name), GCHandleType.Pinned);
             using (var pinnedName = new PinnedGCHandle(utf8NamePinned))
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index aac35661c107a..ca5eaf7502bfc 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1657,7 +1657,7 @@ private void TestInferenceSessionWithByteArray()
             }
         }
 
-        void TestCPUAllocator(InferenceSession session)
+        void TestCPUAllocatorInternal(InferenceSession session)
         {
             int device_id = 0;
             using (var info_cpu = new OrtMemoryInfo(OrtMemoryInfo.allocatorCPU, OrtAllocatorType.ArenaAllocator, device_id, OrtMemType.Default))
@@ -1672,13 +1672,17 @@ void TestCPUAllocator(InferenceSession session)
                 {
                     Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
-                    Assert.True(info_cpu.CompareMemoryInfo(alloc_info));
+                    Assert.True(info_cpu.Equals(alloc_info));
 
                     uint size = 1024;
                     OrtMemoryAllocation chunk = allocator.Allocate(size);
                     Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
-                    Assert.True(chunk.Info.CompareMemoryInfo(alloc_info));
+                    var chunk_info = chunk.Info;
+                    // Allocator type returned may be different on x86 so we don't compare.
+                    Assert.Equal(chunk_info.Name, alloc_info.Name);
+                    Assert.Equal(chunk_info.GetMemoryType(), alloc_info.GetMemoryType());
+                    Assert.Equal(chunk_info.Id, alloc_info.Id);
                     chunk.Dispose();
                     alloc_info.Dispose();
                 }
@@ -1686,7 +1690,7 @@ void TestCPUAllocator(InferenceSession session)
         }
 
 #if USE_CUDA
-        void TestCUDAAllocator(InferenceSession session)
+        void TestCUDAAllocatorInternal(InferenceSession session)
         {
             int device_id = 0;
             using (var info_cuda = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA, OrtAllocatorType.ArenaAllocator, device_id, OrtMemType.Default))
@@ -1701,13 +1705,13 @@ void TestCUDAAllocator(InferenceSession session)
                 {
                     Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
-                    Assert.True(info_cuda.CompareMemoryInfo(alloc_info));
+                    Assert.True(info_cuda.Equals(alloc_info));
 
                     uint size = 1024;
                     OrtMemoryAllocation chunk = allocator.Allocate(size);
                     Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
-                    Assert.True(chunk.Info.CompareMemoryInfo(alloc_info));
+                    Assert.True(chunk.Info.Equals(alloc_info));
                     chunk.Dispose();
                     alloc_info.Dispose();
                 }
@@ -1727,9 +1731,9 @@ private void TestAllocator()
 #endif
                 using (var session = new InferenceSession(modelPath, options))
                 {
-                    TestCPUAllocator(session);
+                    TestCPUAllocatorInternal(session);
 #if USE_CUDA
-                    TestCUDAAllocator(session);
+                    TestCUDAAllocatorInternal(session);
 #endif
                 }
             }

From 48aeccad259954f6af653e34ab3039ac7b3dfbef Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Fri, 31 Jul 2020 18:08:57 -0700
Subject: [PATCH 29/39] Fix centos compilation.

---
 include/onnxruntime/core/session/onnxruntime_cxx_api.h |  3 +++
 .../onnxruntime/core/session/onnxruntime_cxx_inline.h  | 10 +++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index 3f8b6fd8ce23a..f87d4db9a153e 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -112,6 +112,9 @@ struct Base {
   }
 
   T* p_{};
+
+  template <typename>
+  friend struct Unowned;  // This friend line is needed to keep the centos C++ compiler from giving an error
 };
 
 template <typename T>
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index a451c691d40dd..1bfd0366103aa 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -71,28 +71,28 @@ inline const OrtMemoryInfo* AllocatorWithDefaultOptions::GetInfo() const {
 template<typename B>
 inline std::string BaseMemoryInfo<B>::GetAllocatorName() const {
   const char* name = nullptr;
-  ThrowOnError(GetApi().MemoryInfoGetName(p_, &name));
+  ThrowOnError(GetApi().MemoryInfoGetName(*this, &name));
   return std::string(name);
 }
 
 template <typename B>
 inline OrtAllocatorType BaseMemoryInfo<B>::GetAllocatorType() const {
   OrtAllocatorType type;
-  ThrowOnError(GetApi().MemoryInfoGetType(p_, &type));
+  ThrowOnError(GetApi().MemoryInfoGetType(*this, &type));
   return type;
 }
 
 template <typename B>
 int BaseMemoryInfo<B>::GetDeviceId() const {
   int id = 0;
-  ThrowOnError(GetApi().MemoryInfoGetId(p_, &id));
+  ThrowOnError(GetApi().MemoryInfoGetId(*this, &id));
   return id;
 }
 
 template <typename B>
 inline OrtMemType BaseMemoryInfo<B>::GetMemoryType() const {
   OrtMemType type;
-  ThrowOnError(GetApi().MemoryInfoGetMemType(p_, &type));
+  ThrowOnError(GetApi().MemoryInfoGetMemType(*this, &type));
   return type;
 }
 
@@ -100,7 +100,7 @@ template <typename B>
 template <typename U>
 inline bool BaseMemoryInfo<B>::operator==(const BaseMemoryInfo<U>& o) const {
   int comp_result = 0;
-  ThrowOnError(Ort::GetApi().CompareMemoryInfo(p_, o, &comp_result));
+  ThrowOnError(Ort::GetApi().CompareMemoryInfo(*this, o, &comp_result));
   return comp_result == 0;
 }
 

From b4c5d23093936d745748622e95b04685dd2aecf1 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 3 Aug 2020 12:09:59 -0700
Subject: [PATCH 30/39] Fix up info comparision for CPU.

---
 csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index ca5eaf7502bfc..215432395812b 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1672,7 +1672,10 @@ void TestCPUAllocatorInternal(InferenceSession session)
                 {
                     Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
-                    Assert.True(info_cpu.Equals(alloc_info));
+                    // Allocator type returned may be different on x86 so we don't compare.
+                    Assert.Equal(info_cpu.Name, alloc_info.Name);
+                    Assert.Equal(info_cpu.GetMemoryType(), alloc_info.GetMemoryType());
+                    Assert.Equal(info_cpu.Id, alloc_info.Id);
 
                     uint size = 1024;
                     OrtMemoryAllocation chunk = allocator.Allocate(size);

From 53dd8607cd9832fee719c0879102d7f466a3414e Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 3 Aug 2020 15:25:54 -0700
Subject: [PATCH 31/39] Move factory methods to OrtValue

---
 .../FixedBufferOnnxValue.cs                   |  33 +-
 .../InferenceSession.cs                       |  10 +-
 .../NamedOnnxValue.cs                         |   6 +-
 .../NativeOnnxTensorMemory.cs                 |   6 +-
 .../NativeOnnxValueHelper.cs                  | 347 +-----------------
 .../Microsoft.ML.OnnxRuntime/OrtIoBinding.cs  |   4 +-
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  | 234 ++++++++++++
 .../Tensors/Tensor.cs                         |  12 +-
 .../core/session/onnxruntime_cxx_api.h        |  23 ++
 .../core/session/onnxruntime_cxx_inline.h     |  50 +++
 onnxruntime/test/shared_lib/test_inference.cc |   8 +
 11 files changed, 346 insertions(+), 387 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
index 3745e8042633b..be37207e0e517 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@@ -11,14 +11,14 @@ namespace Microsoft.ML.OnnxRuntime
     public class FixedBufferOnnxValue : IDisposable
     {
         internal MemoryHandle PinnedMemory { get; private set; }
-        internal IntPtr Value { get; private set; }
+        internal OrtValue Value { get; private set; }
         internal OnnxValueType OnnxValueType { get; private set; }
         internal TensorElementType ElementType { get; private set; }
 
-        private FixedBufferOnnxValue(MemoryHandle pinnedMemory, IntPtr onnxValue, OnnxValueType onnxValueType, TensorElementType elementType)
+        private FixedBufferOnnxValue(MemoryHandle pinnedMemory, OrtValue ortValue, OnnxValueType onnxValueType, TensorElementType elementType)
         {
             PinnedMemory = pinnedMemory;
-            Value = onnxValue;
+            Value = ortValue;
             OnnxValueType = onnxValueType;
             ElementType = elementType;
         }
@@ -31,35 +31,18 @@ private FixedBufferOnnxValue(MemoryHandle pinnedMemory, IntPtr onnxValue, OnnxVa
         /// <returns></returns>
         public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
         {
-            NativeOnnxValueHelper.CreateNativeOnnxValue(value, out IntPtr onnxValue, out MemoryHandle pinnedMemoryHandle, out OnnxValueType onnxValueType, out TensorElementType elementType);
-
-            Debug.Assert(onnxValueType == OnnxValueType.ONNX_TYPE_TENSOR, "the value should always be a tensor");
-
-            return new FixedBufferOnnxValue(pinnedMemoryHandle, onnxValue, onnxValueType, elementType);
+            var ortValue = OrtValue.CreateFromTensorObject(value, out MemoryHandle pinnedMemoryHandle, out TensorElementType elementType);
+            return new FixedBufferOnnxValue(pinnedMemoryHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
         }
 
         #region IDisposable Support
 
-        // standard dispose pattern to deal with both managed and native resources
-
-        private bool disposed = false;
-
         protected virtual void Dispose(bool disposing)
         {
-            if (!disposed)
+            if (disposing)
             {
-                if (disposing)
-                {
-                    ((IDisposable)PinnedMemory).Dispose();
-                }
-
-                if (Value != IntPtr.Zero)
-                {
-                    NativeMethods.OrtReleaseValue(Value);
-                    Value = IntPtr.Zero;
-                }
-
-                disposed = true;
+                Value.Dispose();
+                PinnedMemory.Dispose();
             }
         }
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 82dc52e3a16d4..9387d2735deec 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -257,7 +257,7 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(
             int inputIndex = 0;
             foreach (var input in inputValues)
             {
-                inputValuesArray[inputIndex] = input.Value;
+                inputValuesArray[inputIndex] = input.Value.Handle;
 
                 inputIndex++;
             }
@@ -353,7 +353,7 @@ public void Run(
             int inputIndex = 0;
             foreach (var input in inputValues)
             {
-                inputValuesArray[inputIndex] = input.Value;
+                inputValuesArray[inputIndex] = input.Value.Handle;
 
                 inputIndex++;
             }
@@ -369,7 +369,7 @@ public void Run(
                     throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
                 }
 
-                outputValuesArray[outputIndex] = output.Value;
+                outputValuesArray[outputIndex] = output.Value.Handle;
 
                 outputIndex++;
             }
@@ -564,7 +564,7 @@ public void Run(
                         throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
                     }
 
-                    outputValuesArray[outputIndex] = output.Value;
+                    outputValuesArray[outputIndex] = output.Value.Handle;
 
                     outputIndex++;
                 }
@@ -646,7 +646,7 @@ public void Run(
                 int inputIndex = 0;
                 foreach (var input in inputValues)
                 {
-                    inputValuesArray[inputIndex] = input.Value;
+                    inputValuesArray[inputIndex] = input.Value.Handle;
 
                     inputIndex++;
                 }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
index 4bc269e1c8573..029dcdcb03f00 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
@@ -5,6 +5,7 @@
 using System;
 using System.Buffers;
 using System.Collections.Generic;
+using System.Runtime.InteropServices.ComTypes;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -70,7 +71,10 @@ internal virtual void ToNativeOnnxValue(
             out MemoryHandle pinnedMemoryHandle,
             out bool disposeOnnxValueAfterUse)
         {
-            NativeOnnxValueHelper.CreateNativeOnnxValue(_value, out onnxValue, out pinnedMemoryHandle, out OnnxValueType onnxValueType, out TensorElementType elementType);
+            var ortValue = OrtValue.CreateFromTensorObject(_value, out pinnedMemoryHandle, out TensorElementType elementType);
+            onnxValue = ortValue.Disown();
+            // Dispose any other parts if any
+            ortValue.Dispose();
             disposeOnnxValueAfterUse = true;
         }
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
index 91dcd6a2aed5f..4c1958485ffbc 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxTensorMemory.cs
@@ -10,6 +10,7 @@
 namespace Microsoft.ML.OnnxRuntime
 {
     /// <summary>
+    /// TODO: dmitrism -> Get rid of this class.
     /// A non-public interface detailing the contract to be honored by NativeOnnxTensorMemory
     /// </summary>
     internal interface NativeMemoryHandler : IDisposable
@@ -118,11 +119,6 @@ public NativeOnnxTensorMemory(IntPtr onnxValueHandle)
 
         public IntPtr Handle { get { return _onnxValueHandle; } }
 
-        ~NativeOnnxTensorMemory()
-        {
-            Dispose(false);
-        }
-
         public void Dispose()
         {
             GC.SuppressFinalize(this);
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index 5bbb12b196aab..c7ba3b3d5290f 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -3,8 +3,6 @@
 
 using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
-using System.Buffers;
-using System.Diagnostics;
 using System.Runtime.InteropServices;
 using System.Text;
 
@@ -45,338 +43,11 @@ public void Dispose()
         #endregion
    }
 
-/// <summary>
-/// This helper class contains methods to create native OrtValue from a managed value object
-/// </summary>
-internal static class NativeOnnxValueHelper
+    /// <summary>
+    /// This helper class contains methods to create native OrtValue from a managed value object
+    /// </summary>
+    internal static class NativeOnnxValueHelper
     {
-        /// <summary>
-        /// Attempts to Pin the buffer, and create a native OnnxValue out of it. the pinned MemoryHandle is passed to output.
-        /// In this case, the pinnedHandle should be kept alive till the native OnnxValue is used, then dispose it.
-        /// If it is not possible to Pin the buffer, then creates OnnxValue from the copy of the data. The output pinnedMemoryHandle
-        /// contains a default value in that case.
-        /// Attempts to infer the type of the value while creating the OnnxValue
-        /// </summary>
-        /// <param name="value"></param>
-        /// <param name="onnxValue"></param>
-        /// <param name="pinnedMemoryHandle"></param>
-        /// <param name="elementType"></param>
-        internal static void CreateNativeOnnxValue(Object value, out IntPtr onnxValue, out MemoryHandle pinnedMemoryHandle, out OnnxValueType onnxValueType, out TensorElementType elementType)
-        {
-            //try to cast _value to Tensor<T>
-            elementType = TensorElementType.DataTypeMax; //invalid
-            IntPtr dataBufferPointer = IntPtr.Zero;
-            int dataBufferLength = 0;
-            ReadOnlySpan<int> shape = null;
-            int rank = 0;
-            onnxValue = IntPtr.Zero;
-
-            if (!(value is Tensor<string>))
-            {
-                if (TryPinAsTensor<float>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<double>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<int>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<uint>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<long>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<ulong>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<short>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<ushort>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<byte>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<sbyte>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                else if (TryPinAsTensor<bool>(value, out pinnedMemoryHandle,
-                                          out dataBufferPointer,
-                                          out dataBufferLength,
-                                          out shape,
-                                          out rank,
-                                          out elementType))
-                {
-                }
-                //TODO: add other types
-                else
-                {
-                    // nothing to cleanup here, since no memory has been pinned
-                    throw new NotSupportedException("The inference value " + nameof(value) + " is not of a supported type");
-                }
-
-                try
-                {
-                    Debug.Assert(dataBufferPointer != IntPtr.Zero, "dataBufferPointer must be non-null after obtaining the pinned buffer");
-
-                    onnxValueType = OnnxValueType.ONNX_TYPE_TENSOR; // set onnx value type to tensor
-
-                    // copy to an ulong[] shape to match int64_t[]
-                    long[] longShape = new long[rank];
-                    for (int i = 0; i < rank; i++)
-                    {
-                        longShape[i] = shape[i];
-                    }
-
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorWithDataAsOrtValue(
-                        OrtMemoryInfo.DefaultInstance.Pointer,
-                        dataBufferPointer,
-                        (UIntPtr)(dataBufferLength),
-                        longShape,
-                        (UIntPtr)rank,
-                        elementType,
-                        out onnxValue));
-                }
-                catch (OnnxRuntimeException e)
-                {
-                    pinnedMemoryHandle.Dispose();
-                    throw e;
-                }
-            }
-            // special case for string Tensor, data needs to be copied to the native buffer
-            else
-            {
-                // calculate native tensor length (sum of string lengths in utf-8)
-                var tensorValue = value as Tensor<string>;
-                int totalLength = 0;
-                for (int i = 0; i < tensorValue.Length; i++)
-                {
-                    totalLength += Encoding.UTF8.GetByteCount(tensorValue.GetValue(i));
-                }
-
-                long[] longShape = new long[tensorValue.Dimensions.Length];
-                for (int i = 0; i < tensorValue.Dimensions.Length; i++)
-                {
-                    longShape[i] = tensorValue.Dimensions[i];
-                }
-
-                // allocate the native tensor
-                IntPtr nativeTensor = IntPtr.Zero;
-                try
-                {
-                    NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorAsOrtValue(
-                                                    OrtAllocator.DefaultInstance.Pointer,
-                                                    longShape,
-                                                    (UIntPtr)(longShape.Length),
-                                                    TensorElementType.String,
-                                                    out nativeTensor
-                                                    ));
-
-                    // fill the native tensor, using GetValue(index) from the Tensor<string>
-                    var len = tensorValue.Length;
-                    var stringsInTensor = new IntPtr[len];
-                    using(var pinnedHandles = new DisposableList<PinnedGCHandle>((int)len))
-                    {
-                        for (int i = 0; i < len; i++)
-                        {
-                            var utf8str = StringToZeroTerminatedUtf8(tensorValue.GetValue(i));
-                            var gcHandle = GCHandle.Alloc(utf8str, GCHandleType.Pinned);
-                            stringsInTensor[i] = gcHandle.AddrOfPinnedObject();
-                            pinnedHandles.Add(new PinnedGCHandle(gcHandle));
-                        }
-
-                        using (var pinnedStrings = new PinnedGCHandle(GCHandle.Alloc(stringsInTensor, GCHandleType.Pinned)))
-                            NativeApiStatus.VerifySuccess(NativeMethods.OrtFillStringTensor(nativeTensor, stringsInTensor, (UIntPtr)len));
-                    }
-                }
-                catch (OnnxRuntimeException e)
-                {
-                    if (nativeTensor != IntPtr.Zero)
-                    {
-                        NativeMethods.OrtReleaseValue(nativeTensor);
-                        throw e;
-                    }
-                }
-
-                onnxValue = nativeTensor; // set the output
-                pinnedMemoryHandle = default; // dummy value for the output
-
-                onnxValueType = OnnxValueType.ONNX_TYPE_TENSOR; // set onnx value type to tensor
-                elementType = TensorElementType.String; // set tensor element type to string
-            }
-        }
-
-        private static bool TryPinAsTensor<T>(
-            Object value,
-            out MemoryHandle pinnedMemoryHandle,
-            out IntPtr dataBufferPointer,
-            out int dataBufferLength,
-            out ReadOnlySpan<int> shape,
-            out int rank,
-            out TensorElementType nativeElementType)
-        {
-            nativeElementType = TensorElementType.DataTypeMax; //invalid
-            dataBufferPointer = IntPtr.Zero;
-            dataBufferLength = 0;
-            shape = null;
-            rank = 0;
-            pinnedMemoryHandle = default;
-
-            Debug.Assert(typeof(T) != typeof(string), "NativeOnnxValueHelper.TryPinAsTensor() must not be called with a string Tensor value");
-
-            if (value is Tensor<T>)
-            {
-                Tensor<T> t = value as Tensor<T>;
-                if (t.IsReversedStride)
-                {
-                    //TODO: not sure how to support reverse stride. may be able to calculate the shape differently
-                    throw new NotSupportedException(nameof(Tensor<T>) + " of reverseStride is not supported");
-                }
-
-                DenseTensor<T> dt = null;
-                if (value is DenseTensor<T>)
-                {
-                    dt = value as DenseTensor<T>;
-                }
-                else
-                {
-                    dt = t.ToDenseTensor();
-                }
-
-                shape = dt.Dimensions;  // does not work for reverse stride
-                rank = dt.Rank;
-                pinnedMemoryHandle = dt.Buffer.Pin();
-                unsafe
-                {
-                    dataBufferPointer = (IntPtr)pinnedMemoryHandle.Pointer;
-                }
-
-                // find the native type
-                if (typeof(T) == typeof(float))
-                {
-                    nativeElementType = TensorElementType.Float;
-                    dataBufferLength = dt.Buffer.Length * sizeof(float);
-                }
-                else if (typeof(T) == typeof(double))
-                {
-                    nativeElementType = TensorElementType.Double;
-                    dataBufferLength = dt.Buffer.Length * sizeof(double);
-                }
-                else if (typeof(T) == typeof(int))
-                {
-                    nativeElementType = TensorElementType.Int32;
-                    dataBufferLength = dt.Buffer.Length * sizeof(int);
-                }
-                else if (typeof(T) == typeof(uint))
-                {
-                    nativeElementType = TensorElementType.UInt32;
-                    dataBufferLength = dt.Buffer.Length * sizeof(uint);
-                }
-                else if (typeof(T) == typeof(long))
-                {
-                    nativeElementType = TensorElementType.Int64;
-                    dataBufferLength = dt.Buffer.Length * sizeof(long);
-                }
-                else if (typeof(T) == typeof(ulong))
-                {
-                    nativeElementType = TensorElementType.UInt64;
-                    dataBufferLength = dt.Buffer.Length * sizeof(ulong);
-                }
-                else if (typeof(T) == typeof(short))
-                {
-                    nativeElementType = TensorElementType.Int16;
-                    dataBufferLength = dt.Buffer.Length * sizeof(short);
-                }
-                else if (typeof(T) == typeof(ushort))
-                {
-                    nativeElementType = TensorElementType.UInt16;
-                    dataBufferLength = dt.Buffer.Length * sizeof(ushort);
-                }
-                else if (typeof(T) == typeof(byte))
-                {
-                    nativeElementType = TensorElementType.UInt8;
-                    dataBufferLength = dt.Buffer.Length * sizeof(byte);
-                }
-                else if (typeof(T) == typeof(sbyte))
-                {
-                    nativeElementType = TensorElementType.Int8;
-                    dataBufferLength = dt.Buffer.Length * sizeof(sbyte);
-                }
-                else if (typeof(T) == typeof(string))
-                {
-                    nativeElementType = TensorElementType.String;
-                    dataBufferLength = dt.Buffer.Length * IntPtr.Size;
-                }
-                else if (typeof(T) == typeof(bool))
-                {
-                    nativeElementType = TensorElementType.Bool;
-                    dataBufferLength = dt.Buffer.Length * sizeof(bool); // Assumes sizeof(BOOL) is always 1 byte in native
-                }
-                else
-                {
-                    //TODO: may extend the supported types
-                    // do not throw exception, rather assign the sentinel value
-                    nativeElementType = TensorElementType.DataTypeMax;
-                }
-                return true;
-            }
-
-            return false;
-        }
-
         /// <summary>
         /// Converts C# UTF-16 string to UTF-8 zero terminated
         /// byte[] instance
@@ -410,16 +81,6 @@ internal static string StringFromNativeUtf8(IntPtr nativeUtf8, int  offset, int
         }
     }
 
-    public enum OnnxValueType
-    {
-        ONNX_TYPE_UNKNOWN = 0,
-        ONNX_TYPE_TENSOR = 1,
-        ONNX_TYPE_SEQUENCE = 2,
-        ONNX_TYPE_MAP = 3,
-        ONNX_TYPE_OPAQUE = 4,
-        ONNX_TYPE_SPARSETENSOR = 5,
-    }
-
     internal static class TensorElementTypeConverter
     {
         public static void GetTypeAndWidth(TensorElementType elemType, out Type type, out int width)
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
index f6fa65e9790cc..940057434f7a8 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -67,7 +67,7 @@ public void BindInput(string name, FixedBufferOnnxValue fixedValue)
             {
                 throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
             }
-            BindInputOrOutput(name, fixedValue.Value, true);
+            BindInputOrOutput(name, fixedValue.Value.Handle, true);
         }
 
         /// <summary>
@@ -100,7 +100,7 @@ public void BindOutput(string name, FixedBufferOnnxValue fixedValue)
             {
                 throw new OnnxRuntimeException(ErrorCode.InvalidArgument, "Binding works only with Tensors");
             }
-            BindInputOrOutput(name, fixedValue.Value, false);
+            BindInputOrOutput(name, fixedValue.Value.Handle, false);
         }
 
         /// <summary>
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index 396fff9c1da87..5460f62c24860 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -3,9 +3,21 @@
 
 using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
+using System.Buffers;
+using System.Runtime.InteropServices;
 
 namespace Microsoft.ML.OnnxRuntime
 {
+    public enum OnnxValueType
+    {
+        ONNX_TYPE_UNKNOWN = 0,
+        ONNX_TYPE_TENSOR = 1,
+        ONNX_TYPE_SEQUENCE = 2,
+        ONNX_TYPE_MAP = 3,
+        ONNX_TYPE_OPAQUE = 4,
+        ONNX_TYPE_SPARSETENSOR = 5,
+    }
+
     /// <summary>
     /// Represents a disposable OrtValue
     /// </summary>
@@ -24,6 +36,8 @@ internal OrtValue(IntPtr handle)
 
         /// <summary>
         /// This internal interface is used to transfer ownership elsewhere.
+        /// This instance must still be disposed in case there are other native
+        /// objects still owned.
         /// </summary>
         /// <returns></returns>
         internal IntPtr Disown()
@@ -80,6 +94,226 @@ out ortValueHandle
             return new OrtValue(ortValueHandle);
         }
 
+        /// <summary>
+        /// This is a factory method that ta
+        /// </summary>
+        /// <param name="value">Tensor object</param>
+        /// <param name="memoryHandle">For all tensor types but string tensors we endevour to use managed memory
+        ///  to avoid additional allocation and copy. This out parameter represents a chunk of pinned memory
+        /// </param>
+        /// <param name="elementType">discovered tensor element type</param>
+        /// <returns></returns>
+        public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle memoryHandle,
+                                                                    out TensorElementType elementType)
+        {
+            // Check if this is a Tensor
+            if (!(value is TensorBase))
+            {
+                throw new NotSupportedException("The inference value " + nameof(value) + " is not of a supported type");
+            }
+
+            var tensorBase = value as TensorBase;
+            var typeInfo = tensorBase.GetTypeInfo();
+            if (typeInfo == null)
+            {
+                throw new OnnxRuntimeException(ErrorCode.RequirementNotRegistered, "BUG Check");
+            }
+
+            MemoryHandle memHandle = default;
+            OrtValue ortValue = null;
+            int dataBufferLength = 0;
+            long[] shape = null;
+            int rank = 0;
+
+            TensorElementType elType = typeInfo.ElementType;
+            var typeSize = typeInfo.TypeSize;
+            if (typeInfo.IsString)
+            {
+                ortValue = CreateStringTensor(value as Tensor<string>);
+                memHandle = default;
+            }
+            else
+            {
+                switch (elType)
+                {
+                    case TensorElementType.Float:
+                        PinAsTensor(value as Tensor<float>, elType, typeSize, out memHandle, out dataBufferLength,
+                            out shape, out rank);
+                        break;
+                    case TensorElementType.Double:
+                        PinAsTensor(value as Tensor<double>, elType, typeSize, out memHandle, out dataBufferLength,
+                                            out shape, out rank);
+                        break;
+                    case TensorElementType.Int32:
+                        PinAsTensor(value as Tensor<int>, elType, typeSize, out memHandle, out dataBufferLength,
+                            out shape, out rank);
+                        break;
+                    case TensorElementType.UInt32:
+                        PinAsTensor(value as Tensor<uint>, elType, typeSize, out memHandle, out dataBufferLength,
+                            out shape, out rank);
+                        break;
+                    case TensorElementType.Int64:
+                        PinAsTensor(value as Tensor<long>, elType, typeSize, out memHandle, out dataBufferLength,
+                            out shape, out rank);
+                        break;
+                    case TensorElementType.UInt64:
+                        PinAsTensor(value as Tensor<ulong>, elType, typeSize, out memHandle, out dataBufferLength,
+                                    out shape, out rank);
+                        break;
+                    case TensorElementType.Int16:
+                        PinAsTensor(value as Tensor<short>, elType, typeSize, out memHandle, out dataBufferLength,
+                            out shape, out rank);
+                        break;
+
+                    case TensorElementType.UInt16:
+                        PinAsTensor(value as Tensor<ushort>, elType, typeSize,
+                                    out memHandle, out dataBufferLength,
+                                    out shape, out rank);
+
+                        break;
+                    case TensorElementType.UInt8:
+                        PinAsTensor(value as Tensor<byte>, elType, typeSize,
+                                    out memHandle, out dataBufferLength,
+                                    out shape, out rank);
+                        break;
+                    case TensorElementType.Int8:
+                        PinAsTensor(value as Tensor<sbyte>, elType, typeSize,
+                            out memHandle, out dataBufferLength,
+                            out shape, out rank);
+                        break;
+                    case TensorElementType.Bool:
+                        PinAsTensor(value as Tensor<bool>, elType, typeSize,
+                                    out memHandle, out dataBufferLength,
+                                    out shape, out rank);
+                        break;
+                    default:
+                        throw new NotSupportedException("Element type: " + elType + " is not of a supported type");
+                }
+
+                try
+                {
+                    IntPtr dataBufferPointer = IntPtr.Zero;
+                    unsafe
+                    {
+                        dataBufferPointer = (IntPtr)memHandle.Pointer;
+                    }
+
+                    IntPtr nativeValue;
+                    NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorWithDataAsOrtValue(
+                        OrtMemoryInfo.DefaultInstance.Pointer,
+                        dataBufferPointer,
+                        (UIntPtr)(dataBufferLength),
+                        shape,
+                        (UIntPtr)rank,
+                        elType,
+                        out nativeValue));
+
+                    ortValue = new OrtValue(nativeValue);
+                }
+                catch (Exception e)
+                {
+                    memHandle.Dispose();
+                    throw e;
+                }
+            }
+            memoryHandle = memHandle;
+            elementType = elType;
+            return ortValue;
+        }
+
+        private static void PinAsTensor<T>(
+                                            Tensor<T> tensor,
+                                            TensorElementType nativeElementType,
+                                            int elementSize,
+                                            out MemoryHandle pinnedHandle,
+                                            out int dataBufferLength,
+                                            out long[] shape,
+                                            out int rank)
+        {
+            if (tensor == null)
+            {
+                throw new OnnxRuntimeException(ErrorCode.Fail, "Cast to Tensor<T> failed. BUG check!");
+            }
+
+            if (tensor.IsReversedStride)
+            {
+                //TODO: not sure how to support reverse stride. may be able to calculate the shape differently
+                throw new NotSupportedException(nameof(Tensor<T>) + " of reverseStride is not supported");
+            }
+
+            DenseTensor<T> dt = null;
+            if (tensor is DenseTensor<T>)
+            {
+                dt = tensor as DenseTensor<T>;
+            }
+            else
+            {
+                dt = tensor.ToDenseTensor();
+            }
+
+            pinnedHandle = dt.Buffer.Pin();
+            dataBufferLength = dt.Buffer.Length * elementSize;
+            shape = new long[dt.Dimensions.Length];
+            for (int i = 0; i < dt.Dimensions.Length; ++i)
+            {
+                shape[i] = dt.Dimensions[i];
+            }
+            rank = dt.Rank;
+        }
+
+        private static OrtValue CreateStringTensor(Tensor<string> tensor)
+        {
+            int totalLength = 0;
+            for (int i = 0; i < tensor.Length; i++)
+            {
+                totalLength += System.Text.Encoding.UTF8.GetByteCount(tensor.GetValue(i));
+            }
+
+            long[] shape = new long[tensor.Dimensions.Length];
+            for (int i = 0; i < tensor.Dimensions.Length; i++)
+            {
+                shape[i] = tensor.Dimensions[i];
+            }
+
+            // allocate the native tensor
+            IntPtr valueHandle = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtCreateTensorAsOrtValue(
+                                OrtAllocator.DefaultInstance.Pointer,
+                                shape,
+                                (UIntPtr)(shape.Length),
+                                TensorElementType.String,
+                                out valueHandle
+                                ));
+
+            var ortValue = new OrtValue(valueHandle);
+            try
+            {
+
+                // fill the native tensor, using GetValue(index) from the Tensor<string>
+                var len = tensor.Length;
+                var nativeStrings = new IntPtr[len];
+                using (var pinnedHandles = new DisposableList<PinnedGCHandle>((int)len))
+                {
+                    for (int i = 0; i < len; i++)
+                    {
+                        var utf8str = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(tensor.GetValue(i));
+                        var gcHandle = GCHandle.Alloc(utf8str, GCHandleType.Pinned);
+                        pinnedHandles.Add(new PinnedGCHandle(gcHandle));
+                        nativeStrings[i] = gcHandle.AddrOfPinnedObject();
+                    }
+
+                    using (var pinnedStrings = new PinnedGCHandle(GCHandle.Alloc(nativeStrings, GCHandleType.Pinned)))
+                        NativeApiStatus.VerifySuccess(NativeMethods.OrtFillStringTensor(ortValue.Handle, nativeStrings, (UIntPtr)len));
+                }
+            }
+            catch (OnnxRuntimeException e)
+            {
+                ortValue.Dispose();
+                throw e;
+            }
+            return ortValue;
+        }
+
         #region Disposable Support
         protected virtual void Dispose(bool disposing)
         {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
index afbe054dbc00c..1e3ba0baa2ccf 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/Tensors/Tensor.cs
@@ -78,7 +78,7 @@ public BFloat16(ushort val)
     /// <summary>
     /// Helps typecasting. Holds primitive type information
     /// </summary>
-    internal class TensorTypeInfo
+    public class TensorTypeInfo
     {
         public TensorElementType ElementType { get; private set; }
         public int TypeSize { get; private set; }
@@ -90,7 +90,7 @@ public TensorTypeInfo(TensorElementType elementType, int typeSize)
         }
     }
 
-    internal class TensorBase
+    public class TensorBase
     {
         private static readonly Dictionary<Type, TensorTypeInfo> typeInfoMap =
             new Dictionary<Type, TensorTypeInfo>()
@@ -254,7 +254,7 @@ public static Tensor<T> CreateFromDiagonal<T>(Tensor<T> diagonal, int offset)
     /// <typeparam name="T">type contained within the Tensor.  Typically a value type such as int, double, float, etc.</typeparam>
     [DebuggerDisplay("{GetArrayString(false)}")]
     // When we cross-compile for frameworks that expose ICloneable this must implement ICloneable as well.
-    public abstract class Tensor<T> : IList, IList<T>, IReadOnlyList<T>, IStructuralComparable, IStructuralEquatable
+    public abstract class Tensor<T> : TensorBase, IList, IList<T>, IReadOnlyList<T>, IStructuralComparable, IStructuralEquatable
     {
         internal static T Zero
         {
@@ -388,7 +388,7 @@ internal static T One
         /// Initialize a 1-dimensional tensor of the specified length
         /// </summary>
         /// <param name="length">Size of the 1-dimensional tensor</param>
-        protected Tensor(int length)
+        protected Tensor(int length) : base(typeof(T))
         {
             dimensions = new[] { length };
             strides = new[] { 1 };
@@ -401,7 +401,7 @@ protected Tensor(int length)
         /// </summary>
         /// <param name="dimensions">An span of integers that represent the size of each dimension of the Tensor to create.</param>
         /// <param name="reverseStride">False (default) to indicate that the first dimension is most major (farthest apart) and the last dimension is most minor (closest together): akin to row-major in a rank-2 tensor.  True to indicate that the last dimension is most major (farthest apart) and the first dimension is most minor (closest together): akin to column-major in a rank-2 tensor.</param>
-        protected Tensor(ReadOnlySpan<int> dimensions, bool reverseStride)
+        protected Tensor(ReadOnlySpan<int> dimensions, bool reverseStride) : base(typeof(T))
         {
             if (dimensions.Length == 0)
             {
@@ -431,7 +431,7 @@ protected Tensor(ReadOnlySpan<int> dimensions, bool reverseStride)
         /// </summary>
         /// <param name="fromArray">Array from which to derive dimensions.</param>
         /// <param name="reverseStride">False (default) to indicate that the first dimension is most major (farthest apart) and the last dimension is most minor (closest together): akin to row-major in a rank-2 tensor.  True to indicate that the last dimension is most major (farthest apart) and the first dimension is most minor (closest together): akin to column-major in a rank-2 tensor.</param>
-        protected Tensor(Array fromArray, bool reverseStride)
+        protected Tensor(Array fromArray, bool reverseStride) : base(typeof(T))
         {
             if (fromArray == null)
             {
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_api.h b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
index f87d4db9a153e..0169ea3d525a9 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_api.h
@@ -337,6 +337,25 @@ struct Value : Base<OrtValue> {
   void FillStringTensorElement(const char* s, size_t index);
 };
 
+// Represents native memory allocation
+struct MemoryAllocation {
+  MemoryAllocation(OrtAllocator* allocator, void* p, size_t size);
+  ~MemoryAllocation();
+  MemoryAllocation(const MemoryAllocation&) = delete;
+  MemoryAllocation& operator=(const MemoryAllocation&) = delete;
+  MemoryAllocation(MemoryAllocation&&);
+  MemoryAllocation& operator=(MemoryAllocation&&);
+
+  void* get() { return p_; }
+  size_t size() const { return size_; }
+
+ private:
+
+  OrtAllocator* allocator_;
+  void* p_;
+  size_t size_;
+};
+
 struct AllocatorWithDefaultOptions {
   AllocatorWithDefaultOptions();
 
@@ -344,6 +363,8 @@ struct AllocatorWithDefaultOptions {
   operator const OrtAllocator*() const { return p_; }
 
   void* Alloc(size_t size);
+  // The return value will own the allocation
+  MemoryAllocation GetAllocation(size_t size);
   void Free(void* p);
 
   const OrtMemoryInfo* GetInfo() const;
@@ -385,6 +406,8 @@ struct Allocator : public Base<OrtAllocator> {
   Allocator(const Session& session, const MemoryInfo&);
 
   void* Alloc(size_t size) const;
+  // The return value will own the allocation
+  MemoryAllocation GetAllocation(size_t size);
   void Free(void* p) const;
   UnownedMemoryInfo GetInfo() const;
 };
diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 1bfd0366103aa..c038de476e312 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -48,6 +48,42 @@ struct TypeToTensorType<uint64_t> { static constexpr ONNXTensorElementDataType t
 template <>
 struct TypeToTensorType<bool> { static constexpr ONNXTensorElementDataType type = ONNX_TENSOR_ELEMENT_DATA_TYPE_BOOL; };
 
+inline MemoryAllocation::MemoryAllocation(OrtAllocator* allocator, void* p, size_t size)
+    : allocator_(allocator), p_(p), size_(size) {
+}
+
+inline MemoryAllocation::~MemoryAllocation() {
+  if (p_ != nullptr) {
+    GetApi().AllocatorFree(allocator_, p_);
+  }
+}
+
+inline MemoryAllocation::MemoryAllocation(MemoryAllocation&& o) :
+  allocator_(nullptr), p_(nullptr), size_(0) {
+  *this = std::move(o);
+}
+
+inline MemoryAllocation& MemoryAllocation::operator=(MemoryAllocation&& o) {
+
+  OrtAllocator* alloc = nullptr;
+  void* p = nullptr;
+  size_t sz = 0;
+
+  // Swap out this
+  std::swap(alloc, allocator_);
+  std::swap(p, p_);
+  std::swap(sz, size_);
+
+  // Swap with incoming
+  std::swap(allocator_, o.allocator_);
+  std::swap(p_, o.p_);
+  std::swap(size_, o.size_);
+
+  // Destroy this instance if needed
+  MemoryAllocation this_alloc(alloc, p, sz);
+  return *this;
+}
+
 inline AllocatorWithDefaultOptions::AllocatorWithDefaultOptions() {
   ThrowOnError(GetApi().GetAllocatorWithDefaultOptions(&p_));
 }
@@ -58,6 +94,13 @@ inline void* AllocatorWithDefaultOptions::Alloc(size_t size) {
   return out;
 }
 
+inline MemoryAllocation Ort::AllocatorWithDefaultOptions::GetAllocation(size_t size) {
+  void* out;
+  ThrowOnError(GetApi().AllocatorAlloc(p_, size, &out));
+  MemoryAllocation result(p_, out, size);
+  return result;
+}
+
 inline void AllocatorWithDefaultOptions::Free(void* p) {
   ThrowOnError(GetApi().AllocatorFree(p_, p));
 }
@@ -124,6 +167,13 @@ inline void* Allocator::Alloc(size_t size) const {
   return out;
 }
 
+inline MemoryAllocation Ort::Allocator::GetAllocation(size_t size) {
+  void* out = nullptr;
+  ThrowOnError(GetApi().AllocatorAlloc(p_, size, &out));
+  MemoryAllocation result(p_, out, size);
+  return result;
+}
+
 inline void Allocator::Free(void* p) const {
   ThrowOnError(GetApi().AllocatorFree(p_, p));
 }
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 16cabc10f0443..4ef72367a8aab 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -408,6 +408,10 @@ TEST(CApiTest, get_allocator_cpu) {
   void* p = cpu_allocator.Alloc(1024);
   ASSERT_NE(p, nullptr);
   cpu_allocator.Free(p);
+
+  auto mem_allocation = cpu_allocator.GetAllocation(1024);
+  ASSERT_NE(nullptr, mem_allocation.get());
+  ASSERT_EQ(1024, mem_allocation.size());
 }
 
 #ifdef USE_CUDA
@@ -424,6 +428,10 @@ TEST(CApiTest, get_allocator_cuda) {
   void* p = cuda_allocator.Alloc(1024);
   ASSERT_NE(p, nullptr);
   cuda_allocator.Free(p);
+
+  auto mem_allocation = cuda_allocator.GetAllocation(1024);
+  ASSERT_NE(nullptr, mem_allocation.get());
+  ASSERT_EQ(1024, mem_allocation.size());
 }
 #endif
 

From df825e4625054cd8d101779c99b0e85a0dd0f97b Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 3 Aug 2020 15:40:24 -0700
Subject: [PATCH 32/39] Don't throw out of destructor

---
 include/onnxruntime/core/session/onnxruntime_cxx_inline.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index c038de476e312..8f999892bb32a 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -54,7 +54,8 @@ inline MemoryAllocation::MemoryAllocation(OrtAllocator* allocator, void* p, size
 
 inline MemoryAllocation::~MemoryAllocation() {
   if (p_ != nullptr) {
-    GetApi().AllocatorFree(allocator_, p_);
+    // We do not throw out of destructor
+    static_cast<void>(GetApi().AllocatorFree(allocator_, p_));
   }
 }
 

From b76f3b9af97b6d0ac0e76aeccee62f57c7fb7c19 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 3 Aug 2020 18:45:06 -0700
Subject: [PATCH 33/39] Fix CI issues.

---
 include/onnxruntime/core/session/onnxruntime_cxx_inline.h | 3 ++-
 onnxruntime/test/shared_lib/test_inference.cc             | 4 ++--
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
index 8f999892bb32a..7e9d58b26bfbe 100644
--- a/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
+++ b/include/onnxruntime/core/session/onnxruntime_cxx_inline.h
@@ -55,7 +55,8 @@ inline MemoryAllocation::MemoryAllocation(OrtAllocator* allocator, void* p, size
 inline MemoryAllocation::~MemoryAllocation() {
   if (p_ != nullptr) {
     // We do not throw out of destructor
-    static_cast<void>(GetApi().AllocatorFree(allocator_, p_));
+    auto ret = GetApi().AllocatorFree(allocator_, p_);
+    static_cast<void>(ret);
   }
 }
 
diff --git a/onnxruntime/test/shared_lib/test_inference.cc b/onnxruntime/test/shared_lib/test_inference.cc
index 4ef72367a8aab..a1078c924fce6 100644
--- a/onnxruntime/test/shared_lib/test_inference.cc
+++ b/onnxruntime/test/shared_lib/test_inference.cc
@@ -411,7 +411,7 @@ TEST(CApiTest, get_allocator_cpu) {
 
   auto mem_allocation = cpu_allocator.GetAllocation(1024);
   ASSERT_NE(nullptr, mem_allocation.get());
-  ASSERT_EQ(1024, mem_allocation.size());
+  ASSERT_EQ(1024U, mem_allocation.size());
 }
 
 #ifdef USE_CUDA
@@ -431,7 +431,7 @@ TEST(CApiTest, get_allocator_cuda) {
 
   auto mem_allocation = cuda_allocator.GetAllocation(1024);
   ASSERT_NE(nullptr, mem_allocation.get());
-  ASSERT_EQ(1024, mem_allocation.size());
+  ASSERT_EQ(1024U, mem_allocation.size());
 }
 #endif
 

From a755a08c3663789dd0aa2a9e19a0386974e9ccf4 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 4 Aug 2020 15:32:40 -0700
Subject: [PATCH 34/39] Refactor inferencing Run() methods.

---
 .../DisposableNamedOnnxValue.cs               |  53 +-
 .../FixedBufferOnnxValue.cs                   |  12 +-
 .../InferenceSession.cs                       | 569 +++++++-----------
 .../NamedOnnxValue.cs                         |  15 +-
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |   4 +-
 .../Microsoft.ML.OnnxRuntime/OrtAllocator.cs  |  15 +-
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  |  41 +-
 .../Microsoft.ML.OnnxRuntime/RunOptions.cs    |  11 +-
 .../SessionOptions.cs                         |  13 +-
 9 files changed, 278 insertions(+), 455 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
index 399d2bf33ff0a..a5232f91c1ac3 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
@@ -72,37 +72,19 @@ private DisposableNamedOnnxValue(string name, Object value, OnnxValueType onnxVa
 
         /// <summary>
         /// Overrides the base class method. Since the instance already has access to the 
-        /// underlying OrtValue handle (if this instance hasn't been disposed), it just assigns
+        /// underlying OrtValue handle, it returns an instance of OrtValue that does not own the raw handle
         /// that to the output onnxValue. With respect to pinnedMemoryHandle, it has no operation
         /// to do, as this class doesn't maintain a managed buffer. It doesn't have to maintain it
         /// as it already is associated with the object of interest (native OrtValue)
         /// </summary>
-        /// <param name="onnxValue"></param>
         /// <param name="pinnedMemoryHandle"></param>
-        /// <param name="disposeOnnxValueAfterUse"></param>
-        internal override void ToNativeOnnxValue(out IntPtr onnxValue, out MemoryHandle pinnedMemoryHandle, out bool disposeOnnxValueAfterUse)
+        internal override OrtValue ToOrtValue(out MemoryHandle? pinnedMemoryHandle)
         {
-            // Make sure that this instance hasn't been disposed yet
-            if (disposedValue)
-            {
-                throw new ObjectDisposedException(nameof(DisposableNamedOnnxValue),
-                                                  "This instance of DisposableNamedOnnxValue has already been disposed");
-            }
-
-            // If not already disposed, _nativeMemoryManager can only be null
-            // for Maps and SequenceTensors
-            if (_nativeMemoryManager == null)
-            {
-                throw new NotSupportedException("Use of Maps and SequenceTensors is not yet supported");
-            }
-
-            // Assign the onnxValue by querying this instance's NativeOnnxTensorMemory instance
-            onnxValue = _nativeMemoryManager.Handle;
-
             // PinnedMemoryHandle holds the default value as DisposableNamedOnnxValue
             // doesn't hold any managed buffer (that needs to be pinned)
-            pinnedMemoryHandle = default;
-            disposeOnnxValueAfterUse = false;
+            pinnedMemoryHandle = null;
+            // Assign the onnxValue by querying this instance's NativeOnnxTensorMemory instance
+            return new OrtValue(_nativeMemoryManager.Handle, false);
         }
 
         internal static DisposableNamedOnnxValue CreateTensorFromOnnxValue(string name, IntPtr nativeOnnxValue)
@@ -170,9 +152,11 @@ internal static DisposableNamedOnnxValue CreateTensorFromOnnxValue(string name,
             return result;
         }
 
-        internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr nativeOnnxValue)
+        internal static DisposableNamedOnnxValue CreateFromOrtValue(string name, OrtValue ortValue)
         {
-            return CreateFromOnnxValue(name, nativeOnnxValue, OrtAllocator.DefaultInstance);
+            var result = CreateFromOnnxValue(name, ortValue.Handle, OrtAllocator.DefaultInstance);
+            ortValue.Disown();
+            return result;
         }
 
         internal static DisposableNamedOnnxValue CreateFromOnnxValue(string name, IntPtr nativeOnnxValue, OrtAllocator allocator)
@@ -283,29 +267,20 @@ private static DisposableNamedOnnxValue DisposableNamedOnnxValueFromNativeMap<K,
         }
 
         #region IDisposable Support
-        private bool disposedValue = false; // To detect redundant calls
 
         protected virtual void Dispose(bool disposing)
         {
-            if (!disposedValue)
+            if (disposing)
             {
-                if (disposing)
+                // dispose managed state (managed objects).
+                if (_nativeMemoryManager != null)
                 {
-                    // dispose managed state (managed objects).
-                    if (_nativeMemoryManager != null)
-                    {
-                        _nativeMemoryManager.Dispose();
-                        _nativeMemoryManager = null;
-                    }
+                    _nativeMemoryManager.Dispose();
+                    _nativeMemoryManager = null;
                 }
-
-                // free unmanaged resources (unmanaged objects) and override a finalizer below.
-                // set large fields to null.
-                disposedValue = true;
             }
         }
 
-        // This code added to correctly implement the disposable pattern.
         public void Dispose()
         {
             // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
index be37207e0e517..41caec3ab6846 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@@ -31,8 +31,16 @@ private FixedBufferOnnxValue(MemoryHandle pinnedMemory, OrtValue ortValue, OnnxV
         /// <returns></returns>
         public static FixedBufferOnnxValue CreateFromTensor<T>(Tensor<T> value)
         {
-            var ortValue = OrtValue.CreateFromTensorObject(value, out MemoryHandle pinnedMemoryHandle, out TensorElementType elementType);
-            return new FixedBufferOnnxValue(pinnedMemoryHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
+            MemoryHandle? memHandle;
+            var ortValue = OrtValue.CreateFromTensorObject(value, out memHandle, out TensorElementType elementType);
+            if (memHandle.HasValue)
+            {
+                return new FixedBufferOnnxValue((MemoryHandle)memHandle, ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
+            }
+            else
+            {
+                return new FixedBufferOnnxValue(default(MemoryHandle), ortValue, OnnxValueType.ONNX_TYPE_TENSOR, elementType);
+            }
         }
 
         #region IDisposable Support
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 9387d2735deec..fd5a53f2cf1e6 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -7,6 +7,7 @@
 using System.IO;
 using System.Linq;
 using Microsoft.ML.OnnxRuntime.Tensors;
+using System.Buffers;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -129,77 +130,15 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(IReadOnlyColl
         /// <returns>Output Tensors in a Collection of NamedOnnxValue. User must dispose the output.</returns>
         public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(IReadOnlyCollection<NamedOnnxValue> inputs, IReadOnlyCollection<string> outputNames, RunOptions options)
         {
-            // prepare inputs
-            var inputNamesArray = new string[inputs.Count];
-            var inputValuesArray = new IntPtr[inputs.Count];
-            var pinnedInputBufferHandles = new System.Buffers.MemoryHandle[inputs.Count];
-            var disposeInputs = new bool[inputs.Count];
-
-            int inputIndex = 0;
-            foreach (var input in inputs)
+            using (var cleanupList = new DisposableList<IDisposable>())
             {
-                inputNamesArray[inputIndex] = input.Name;
+                var inputNamesArray = ConvertNamesToUtf8(inputs, v => v.Name, cleanupList);
+                var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList);
+                var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
 
-                // create Tensor from the input if feasible, else throw notsupported exception for now
-                input.ToNativeOnnxValue(
-                    out inputValuesArray[inputIndex],
-                    out pinnedInputBufferHandles[inputIndex],
-                    out disposeInputs[inputIndex]);
-
-                inputIndex++;
-            }
-
-            // prepare outputs
-            string[] outputNamesArray = outputNames as string[] ?? outputNames.ToArray();
-            IntPtr[] outputValuesArray = new IntPtr[outputNames.Count];
-
-            IntPtr status = NativeMethods.OrtRun(
-                                                _nativeHandle,
-                                                options.Handle,
-                                                inputNamesArray,
-                                                inputValuesArray,
-                                                (UIntPtr)inputs.Count,
-                                                outputNamesArray,
-                                                (UIntPtr)outputNames.Count,
-                                                outputValuesArray /* Empty array is passed in to receive output OrtValue pointers */
-                                                );
-
-            try
-            {
-                NativeApiStatus.VerifySuccess(status);
-                var result = new DisposableList<DisposableNamedOnnxValue>(outputValuesArray.Length);
-                for (int i = 0; i < outputValuesArray.Length; i++)
-                {
-                    result.Add(DisposableNamedOnnxValue.CreateFromOnnxValue(outputNamesArray[i], outputValuesArray[i]));
-                }
-
-                return result;
+                var ortValues = RunImpl(options, inputNamesArray, inputValuesArray, outputNamesArray, cleanupList);
+                return CreateDisposableResult(ortValues, outputNames);
             }
-            catch (OnnxRuntimeException e)
-            {
-                //clean up the individual output tensors if it is not null;
-                for (int i = 0; i < outputValuesArray.Length; i++)
-                {
-                    if (outputValuesArray[i] != IntPtr.Zero)
-                    {
-                        NativeMethods.OrtReleaseValue(outputValuesArray[i]);
-                    }
-                }
-                throw e;
-            }
-            finally
-            {
-                for (int i = 0; i < inputs.Count; i++)
-                {
-                    if (disposeInputs[i])
-                    {
-                        NativeMethods.OrtReleaseValue(inputValuesArray[i]); // For elementary type Tensors, this should not release the buffer, but should delete the native tensor object.
-                                                                            // For string tensors, this releases the native memory allocated for the tensor, including the buffer
-                        pinnedInputBufferHandles[i].Dispose();
-                    }
-                }
-            }
-
         }
 
         /// <summary>
@@ -251,56 +190,22 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(
                 throw new ArgumentException($"Length of {nameof(inputNames)} ({inputNames.Count}) must match that of {nameof(inputValues)} ({inputValues.Count}).");
             }
 
-            // prepare inputs
-            string[] inputNamesArray = inputNames as string[] ?? inputNames.ToArray();
-            IntPtr[] inputValuesArray = new IntPtr[inputNames.Count];
-            int inputIndex = 0;
-            foreach (var input in inputValues)
+            using (var cleanupList = new DisposableList<IDisposable>())
             {
-                inputValuesArray[inputIndex] = input.Value.Handle;
+                var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList);
+                var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
 
-                inputIndex++;
-            }
-
-            // prepare outputs
-            string[] outputNamesArray = outputNames as string[] ?? outputNames.ToArray();
-            IntPtr[] outputValuesArray = new IntPtr[outputNames.Count];
-
-            IntPtr status = NativeMethods.OrtRun(
-                                                _nativeHandle,
-                                                options.Handle,
-                                                inputNamesArray,
-                                                inputValuesArray,
-                                                (UIntPtr)inputNames.Count,
-                                                outputNamesArray,
-                                                (UIntPtr)outputNames.Count,
-                                                outputValuesArray /* Empty array is passed in to receive output OrtValue pointers */
-                                                );
-
-            try
-            {
-                NativeApiStatus.VerifySuccess(status);
-                var result = new DisposableList<DisposableNamedOnnxValue>(outputValuesArray.Length);
-                for (int i = 0; i < outputValuesArray.Length; i++)
+                int inputIndex = 0;
+                IntPtr[] inputValuesArray = new IntPtr[inputValues.Count];
+                foreach (var input in inputValues)
                 {
-                    result.Add(DisposableNamedOnnxValue.CreateFromOnnxValue(outputNamesArray[i], outputValuesArray[i]));
+                    inputValuesArray[inputIndex] = input.Value.Handle;
+                    inputIndex++;
                 }
 
-                return result;
+                var ortValues = RunImpl(options, inputNamesArray, inputNamesArray, outputNamesArray, cleanupList);
+                return CreateDisposableResult(ortValues, outputNames);
             }
-            catch (OnnxRuntimeException e)
-            {
-                //clean up the individual output tensors if it is not null;
-                for (uint i = 0; i < outputValuesArray.Length; i++)
-                {
-                    if (outputValuesArray[i] != IntPtr.Zero)
-                    {
-                        NativeMethods.OrtReleaseValue(outputValuesArray[i]);
-                    }
-                }
-                throw e;
-            }
-
         }
 
         /// <summary>
@@ -347,48 +252,49 @@ public void Run(
                 throw new ArgumentException($"Length of {nameof(outputNames)} ({outputNames.Count}) must match that of {nameof(outputValues)} ({outputValues.Count}).");
             }
 
-            // prepare inputs
-            string[] inputNamesArray = inputNames as string[] ?? inputNames.ToArray();
-            IntPtr[] inputValuesArray = new IntPtr[inputNames.Count];
-            int inputIndex = 0;
-            foreach (var input in inputValues)
+            using (var cleanupList = new DisposableList<IDisposable>())
             {
-                inputValuesArray[inputIndex] = input.Value.Handle;
-
-                inputIndex++;
-            }
+                // prepare inputs
+                var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList);
+                var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
 
-            // prepare outputs
-            string[] outputNamesArray = outputNames as string[] ?? outputNames.ToArray();
-            IntPtr[] outputValuesArray = new IntPtr[outputNames.Count];
-            int outputIndex = 0;
-            foreach (var output in outputValues)
-            {
-                if (output.ElementType == Tensors.TensorElementType.String)
+                IntPtr[] inputValuesArray = new IntPtr[inputNames.Count];
+                int inputIndex = 0;
+                foreach (var input in inputValues)
                 {
-                    throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
+                    inputValuesArray[inputIndex] = input.Value.Handle;
+                    inputIndex++;
                 }
 
-                outputValuesArray[outputIndex] = output.Value.Handle;
+                // prepare outputs
+                IntPtr[] outputValuesArray = new IntPtr[outputNames.Count];
+                int outputIndex = 0;
+                foreach (var output in outputValues)
+                {
+                    if (output.ElementType == Tensors.TensorElementType.String)
+                    {
+                        throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
+                    }
 
-                outputIndex++;
-            }
+                    outputValuesArray[outputIndex] = output.Value.Handle;
+                    outputIndex++;
+                }
 
-            IntPtr status = NativeMethods.OrtRun(
-                                                _nativeHandle,
-                                                options.Handle,
-                                                inputNamesArray,
-                                                inputValuesArray,
-                                                (UIntPtr)inputNames.Count,
-                                                outputNamesArray,
-                                                (UIntPtr)outputNames.Count,
-                                                outputValuesArray /* pointers to Pre-allocated OrtValue instances */
-                                                );
-
-            NativeApiStatus.VerifySuccess(status);
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
+                                                    _nativeHandle,
+                                                    options.Handle,
+                                                    inputNamesArray,
+                                                    inputValuesArray,
+                                                    (UIntPtr)inputNames.Count,
+                                                    outputNamesArray,
+                                                    (UIntPtr)outputNames.Count,
+                                                    outputValuesArray /* pointers to Pre-allocated OrtValue instances */
+                                                    ));
+            }
         }
 
         /// <summary>
+        ///
         /// Runs the loaded model for the given inputs and outputs.
         /// 
         /// Outputs need to be created with correct type and dimension to receive the fetched data.
@@ -403,8 +309,9 @@ public void Run(
         }
 
         /// <summary>
+        ///
         /// Runs the loaded model for the given inputs and outputs. Uses the given RunOptions for this run.
-        /// 
+        ///
         /// Outputs need to be created with correct type and dimension to receive the fetched data.
         /// </summary>
         /// <param name="inputs">Specify a collection of <see cref="NamedOnnxValue"/> that indicates the input values.</param>
@@ -415,49 +322,14 @@ public void Run(
             IReadOnlyCollection<NamedOnnxValue> outputs,
             RunOptions options)
         {
-            var inputNamesArray = new string[inputs.Count];
-            var inputValuesArray = new IntPtr[inputs.Count];
-            var pinnedInputBufferHandles = new System.Buffers.MemoryHandle[inputs.Count];
-            var disposeInputs = new bool[inputs.Count];
-
-            var outputNamesArray = new string[outputs.Count];
-            var outputValuesArray = new IntPtr[outputs.Count];
-            var pinnedOutputBufferHandles = new System.Buffers.MemoryHandle[outputs.Count];
-            var disposeOutputs = new bool[outputs.Count];
-
-            try
+            using(var cleanupList = new DisposableList<IDisposable>())
             {
-                // prepare inputs
-                int inputIndex = 0;
-                foreach (var input in inputs)
-                {
-                    inputNamesArray[inputIndex] = input.Name;
-
-                    // create native OrtValue from the input if feasible, else throw notsupported exception for now
-                    input.ToNativeOnnxValue(
-                        out inputValuesArray[inputIndex],
-                        out pinnedInputBufferHandles[inputIndex],
-                        out disposeInputs[inputIndex]);
-
-                    inputIndex++;
-                }
+                var inputNamesArray = ConvertNamesToUtf8(inputs, i => i.Name, cleanupList);
+                var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList);
+                var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList);
+                var outputValuesArray = GetOrtValuesHandles(outputs, cleanupList);
 
-                // prepare outputs
-                int outputIndex = 0;
-                foreach (var output in outputs)
-                {
-                    outputNamesArray[outputIndex] = output.Name;
-
-                    // create native OrtValue from the output if feasible, else throw notsupported exception for now
-                    output.ToNativeOnnxValue(
-                        out outputValuesArray[outputIndex],
-                        out pinnedOutputBufferHandles[outputIndex],
-                        out disposeOutputs[outputIndex]);
-
-                    outputIndex++;
-                }
-
-                IntPtr status = NativeMethods.OrtRun(
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
                                                     _nativeHandle,
                                                     options.Handle,
                                                     inputNamesArray,
@@ -466,31 +338,7 @@ public void Run(
                                                     outputNamesArray,
                                                     (UIntPtr)outputs.Count,
                                                     outputValuesArray /* pointers to Pre-allocated OrtValue instances */
-                                                    );
-
-                NativeApiStatus.VerifySuccess(status);
-            }
-            finally
-            {
-                for (int i = 0; i < inputs.Count; i++)
-                {
-                    if (disposeInputs[i])
-                    {
-                        NativeMethods.OrtReleaseValue(inputValuesArray[i]); // For elementary type Tensors, this should not release the buffer, but should delete the native tensor object.
-                                                                            // For string tensors, this releases the native memory allocated for the tensor, including the buffer
-                        pinnedInputBufferHandles[i].Dispose();
-                    }
-                }
-
-                for (int i = 0; i < outputs.Count; i++)
-                {
-                    if (disposeOutputs[i])
-                    {
-                        NativeMethods.OrtReleaseValue(outputValuesArray[i]); // For elementary type Tensors, this should not release the buffer, but should delete the native tensor object.
-                                                                             // For string tensors, this releases the native memory allocated for the tensor, including the buffer
-                        pinnedOutputBufferHandles[i].Dispose();
-                    }
-                }
+                                                    ));
             }
         }
 
@@ -530,32 +378,15 @@ public void Run(
                 throw new ArgumentException($"Length of {nameof(outputNames)} ({outputNames.Count}) must match that of {nameof(outputValues)} ({outputValues.Count}).");
             }
 
-
-            var inputNamesArray = new string[inputs.Count];
-            var inputValuesArray = new IntPtr[inputs.Count];
-            var pinnedInputBufferHandles = new System.Buffers.MemoryHandle[inputs.Count];
-            var disposeInputs = new bool[inputs.Count];
-
-            try
+            using(var cleanupList = new DisposableList<IDisposable>())
             {
                 // prepare inputs
-                int inputIndex = 0;
-                foreach (var input in inputs)
-                {
-                    inputNamesArray[inputIndex] = input.Name;
-
-                    // create native OrtValue from the input if feasible, else throw notsupported exception for now
-                    input.ToNativeOnnxValue(
-                        out inputValuesArray[inputIndex],
-                        out pinnedInputBufferHandles[inputIndex],
-                        out disposeInputs[inputIndex]);
-
-                    inputIndex++;
-                }
+                var inputNamesArray = ConvertNamesToUtf8(inputs, i => i.Name, cleanupList);
+                var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList);
 
                 // prepare outputs
-                string[] outputNamesArray = outputNames as string[] ?? outputNames.ToArray();
-                IntPtr[] outputValuesArray = new IntPtr[outputNames.Count];
+                var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
+                IntPtr[] outputValuesArray = new IntPtr[outputValues.Count];
                 int outputIndex = 0;
                 foreach (var output in outputValues)
                 {
@@ -563,13 +394,11 @@ public void Run(
                     {
                         throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
                     }
-
                     outputValuesArray[outputIndex] = output.Value.Handle;
-
                     outputIndex++;
                 }
 
-                IntPtr status = NativeMethods.OrtRun(
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
                                                     _nativeHandle,
                                                     options.Handle,
                                                     inputNamesArray,
@@ -578,28 +407,14 @@ public void Run(
                                                     outputNamesArray,
                                                     (UIntPtr)outputNames.Count,
                                                     outputValuesArray /* pointers to Pre-allocated OrtValue instances */
-                                                    );
-
-
-                NativeApiStatus.VerifySuccess(status);
-            }
-            finally
-            {
-                for (int i = 0; i < inputs.Count; i++)
-                {
-                    if (disposeInputs[i])
-                    {
-                        NativeMethods.OrtReleaseValue(inputValuesArray[i]); // For elementary type Tensors, this should not release the buffer, but should delete the native tensor object.
-                                                                            // For string tensors, this releases the native memory allocated for the tensor, including the buffer
-                        pinnedInputBufferHandles[i].Dispose();
-                    }
-                }
+                                                    ));
             }
         }
 
         /// <summary>
+        ///
         /// Runs the loaded model for the given inputs and outputs.
-        /// 
+        ///
         /// Outputs need to be created with correct type and dimension to receive the fetched data.
         /// </summary>
         /// <param name="inputNames">Specify a collection of string that indicates the input names. Should match <paramref name="inputValues"/>.</param>
@@ -614,8 +429,9 @@ public void Run(
         }
 
         /// <summary>
+        ///
         /// Runs the loaded model for the given inputs and outputs. Uses the given RunOptions for this run.
-        /// 
+        ///
         /// Outputs need to be created with correct type and dimension to receive the fetched data.
         /// </summary>
         /// <param name="inputNames">Specify a collection of string that indicates the input names. Should match <paramref name="inputValues"/>.</param>
@@ -633,41 +449,23 @@ public void Run(
                 throw new ArgumentException($"Length of {nameof(inputNames)} ({inputNames.Count}) must match that of {nameof(inputValues)} ({inputValues.Count}).");
             }
 
-            var outputNamesArray = new string[outputs.Count];
-            var outputValuesArray = new IntPtr[outputs.Count];
-            var pinnedOutputBufferHandles = new System.Buffers.MemoryHandle[outputs.Count];
-            var disposeOutputs = new bool[outputs.Count];
-
-            try
+            using(var cleanupList = new DisposableList<IDisposable>())
             {
                 // prepare inputs
-                string[] inputNamesArray = inputNames as string[] ?? inputNames.ToArray();
-                IntPtr[] inputValuesArray = new IntPtr[inputNames.Count];
+                var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList);
+                IntPtr[] inputValuesArray = new IntPtr[inputValues.Count];
                 int inputIndex = 0;
                 foreach (var input in inputValues)
                 {
                     inputValuesArray[inputIndex] = input.Value.Handle;
-
                     inputIndex++;
                 }
 
                 // prepare outputs
+                var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList);
+                var outputValuesArray = GetOrtValuesHandles(outputs, cleanupList);
 
-                int outputIndex = 0;
-                foreach (var output in outputs)
-                {
-                    outputNamesArray[outputIndex] = output.Name;
-
-                    // create native OrtValue from the output if feasible, else throw notsupported exception for now
-                    output.ToNativeOnnxValue(
-                        out outputValuesArray[outputIndex],
-                        out pinnedOutputBufferHandles[outputIndex],
-                        out disposeOutputs[outputIndex]);
-
-                    outputIndex++;
-                }
-
-                IntPtr status = NativeMethods.OrtRun(
+                NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
                                                     _nativeHandle,
                                                     options.Handle,
                                                     inputNamesArray,
@@ -676,22 +474,7 @@ public void Run(
                                                     outputNamesArray,
                                                     (UIntPtr)outputs.Count,
                                                     outputValuesArray /* pointers to Pre-allocated OrtValue instances */
-                                                    );
-
-
-                NativeApiStatus.VerifySuccess(status);
-            }
-            finally
-            {
-                for (int i = 0; i < outputs.Count; i++)
-                {
-                    if (disposeOutputs[i])
-                    {
-                        NativeMethods.OrtReleaseValue(outputValuesArray[i]); // For elementary type Tensors, this should not release the buffer, but should delete the native tensor object.
-                                                                             // For string tensors, this releases the native memory allocated for the tensor, including the buffer
-                        pinnedOutputBufferHandles[i].Dispose();
-                    }
-                }
+                                                    ));
             }
         }
 
@@ -758,7 +541,8 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> RunWithBindingAnd
                         var ortValue = ortValues.ElementAt(i);
                         result.Add(DisposableNamedOnnxValue.CreateTensorFromOnnxValue(outputNames[i], ortValue.Handle));
                         // We transferred ownership of the handle.
-                        // Make sure it is not disposed here
+                        // But ortValues will still call Dispose() if there are other parts of the
+                        // object needs disposing.
                         ortValue.Disown();
                     }
                 } catch(Exception e)
@@ -786,6 +570,104 @@ public string EndProfiling()
             }
         }
 
+        // Delegate for string extraction from an arbitrary input/output object
+        private delegate string NameExtractor<in TInput>(TInput input);
+
+        /// <summary>
+        /// Run helper
+        /// </summary>
+        /// <param name="names">names to convert to zero terminated utf8 and pin</param>
+        /// <param name="cleanupList">list to add pinned memory to for later disposal</param>
+        /// <returns></returns>
+        private IntPtr[] ConvertNamesToUtf8<T>(IReadOnlyCollection<T> inputs, NameExtractor<T> extractor,
+            DisposableList<IDisposable> cleanupList)
+        {
+            var result = new IntPtr[inputs.Count];
+            for (int i = 0; i < inputs.Count; ++i)
+            {
+                var name = extractor(inputs.ElementAt(i));
+                var utf8Name = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(name);
+                var pinnedHandle = new PinnedGCHandle(GCHandle.Alloc(utf8Name, GCHandleType.Pinned));
+                result[i] = pinnedHandle.Pointer;
+                cleanupList.Add(pinnedHandle);
+            }
+            return result;
+        }
+
+        /// <summary>
+        /// This function obtains ortValues for NamedOnnxValue.
+        /// The problem with NamedOnnxValue is that it does not contain any Onnx (OrtValue)
+        /// so calling ToOrtValue creates a new instance of OrtValue that needs to be disposed.
+        /// The deriving object DisposableNamedValue actually contains and owns OrtValue and it returns
+        /// it.
+        /// </summary>
+        /// <param name="values"></param>
+        /// <param name="cleanupList"></param>
+        /// <returns></returns>
+        private IntPtr[] GetOrtValuesHandles(IReadOnlyCollection<NamedOnnxValue> values, DisposableList<IDisposable> cleanupList)
+        {
+            IntPtr[] result = new IntPtr[values.Count];
+            for (int inputIndex = 0; inputIndex < values.Count; ++inputIndex)
+            {
+                var input = values.ElementAt(inputIndex);
+                MemoryHandle? memHandle;
+                var ortValue = input.ToOrtValue(out memHandle);
+                cleanupList.Add(ortValue);
+                if (memHandle.HasValue)
+                {
+                    cleanupList.Add(memHandle);
+                }
+                result[inputIndex] = ortValue.Handle;
+                inputIndex++;
+            }
+            return result;
+        }
+
+        private DisposableList<OrtValue> RunImpl(RunOptions options, IntPtr[] inputNames, IntPtr[] inputValues, IntPtr[] outputNames,
+            DisposableList<IDisposable> cleanupList)
+        {
+            var ortValues = new DisposableList<OrtValue>(outputNames.Length);
+            cleanupList.Add(ortValues);
+
+            IntPtr[] outputValuesArray = new IntPtr[outputNames.Length];
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
+                                                _nativeHandle,
+                                                options.Handle,
+                                                inputNames,
+                                                inputValues,
+                                                (UIntPtr)inputNames.Length,
+                                                outputNames,
+                                                (UIntPtr)outputNames.Length,
+                                                outputValuesArray /* Empty array is passed in to receive output OrtValue pointers */
+                                                ));
+
+            foreach (var v in outputValuesArray)
+            {
+                ortValues.Add(new OrtValue(v));
+            }
+            return ortValues;
+        }
+
+        IDisposableReadOnlyCollection<DisposableNamedOnnxValue> CreateDisposableResult(List<OrtValue> ortValues,
+            IReadOnlyCollection<string> outputNames)
+        {
+            var result = new DisposableList<DisposableNamedOnnxValue>(outputNames.Count);
+            try
+            {
+                for (int i = 0; i < ortValues.Count; i++)
+                {
+                    var ortValue = ortValues[i];
+                    result.Add(DisposableNamedOnnxValue.CreateFromOrtValue(outputNames.ElementAt(i), ortValue));
+                }
+            }
+            catch (OnnxRuntimeException e)
+            {
+                result.Dispose();
+                throw e;
+            }
+            return result;
+        }
+
         //TODO: kept internal until implemented
         internal ModelMetadata ModelMetadata
         {
@@ -881,128 +763,97 @@ private void InitWithSessionHandle(IntPtr session, SessionOptions options)
 
         private string GetOutputName(ulong index)
         {
+            var allocator = OrtAllocator.DefaultInstance;
             IntPtr nameHandle = IntPtr.Zero;
             string str = null;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOutputName(
+                                           _nativeHandle,
+                                           (UIntPtr)index,
+                                           allocator.Pointer,
+                                           out nameHandle));
 
-            try
+            using (var ortAllocation = new OrtMemoryAllocation(allocator, nameHandle, 0))
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOutputName(
-                                               _nativeHandle,
-                                               (UIntPtr)index,
-                                               OrtAllocator.DefaultInstance.Pointer,
-                                               out nameHandle));
-
                 str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
-            finally
-            {
-                if (nameHandle != IntPtr.Zero)
-                {
-                    OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
-                }
-            }
 
             return str;
         }
 
         private string GetInputName(ulong index)
         {
-            IntPtr nameHandle = IntPtr.Zero;
             string str = null;
+            var allocator = OrtAllocator.DefaultInstance;
+            IntPtr nameHandle = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetInputName(
+                                           _nativeHandle,
+                                           (UIntPtr)index,
+                                           allocator.Pointer,
+                                           out nameHandle));
 
-            try
+            using (var ortAllocation = new OrtMemoryAllocation(allocator, nameHandle, 0))
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetInputName(
-                                               _nativeHandle,
-                                               (UIntPtr)index,
-                                               OrtAllocator.DefaultInstance.Pointer,
-                                               out nameHandle));
-
                 str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
-            finally
-            {
-                if (nameHandle != IntPtr.Zero)
-                {
-                    OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
-                }
-            }
             return str;
         }
 
         private string GetOverridableInitializerName(ulong index)
         {
-            IntPtr nameHandle = IntPtr.Zero;
             string str = null;
-
-            try
+            var allocator = OrtAllocator.DefaultInstance;
+            IntPtr nameHandle = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOverridableInitializerName(
+                                            _nativeHandle,
+                                            (UIntPtr)index,
+                                            allocator.Pointer,
+                                            out nameHandle));
+            using(var ortAllocation = new OrtMemoryAllocation(allocator, nameHandle, 0))
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOverridableInitializerName(
-                                                _nativeHandle,
-                                                (UIntPtr)index,
-                                                OrtAllocator.DefaultInstance.Pointer,
-                                                out nameHandle));
-
                 str = NativeOnnxValueHelper.StringFromNativeUtf8(nameHandle);
             }
-            finally
-            {
-                if (nameHandle != IntPtr.Zero)
-                {
-                    OrtAllocator.DefaultInstance.FreeMemory(nameHandle);
-                }
-            }
             return str;
         }
 
         private NodeMetadata GetInputMetadata(ulong index)
         {
             IntPtr typeInfo = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetInputTypeInfo(_nativeHandle, (UIntPtr)index, out typeInfo));
             try
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetInputTypeInfo(_nativeHandle, (UIntPtr)index, out typeInfo));
                 return GetMetadataFromTypeInfo(typeInfo);
             }
             finally
             {
-                if (typeInfo != IntPtr.Zero)
-                {
-                    NativeMethods.OrtReleaseTypeInfo(typeInfo);
-                }
+                NativeMethods.OrtReleaseTypeInfo(typeInfo);
             }
         }
 
         private NodeMetadata GetOutputMetadata(ulong index)
         {
             IntPtr typeInfo = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOutputTypeInfo(_nativeHandle, (UIntPtr)index, out typeInfo));
             try
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOutputTypeInfo(_nativeHandle, (UIntPtr)index, out typeInfo));
                 return GetMetadataFromTypeInfo(typeInfo);
             }
             finally
             {
-                if (typeInfo != IntPtr.Zero)
-                {
-                    NativeMethods.OrtReleaseTypeInfo(typeInfo);
-                }
+                NativeMethods.OrtReleaseTypeInfo(typeInfo);
             }
         }
 
         private NodeMetadata GetOverridableInitializerMetadata(ulong index)
         {
             IntPtr typeInfo = IntPtr.Zero;
+            NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOverridableInitializerTypeInfo(_nativeHandle, (UIntPtr)index, out typeInfo));
             try
             {
-                NativeApiStatus.VerifySuccess(NativeMethods.OrtSessionGetOverridableInitializerTypeInfo(_nativeHandle, (UIntPtr)index, out typeInfo));
                 return GetMetadataFromTypeInfo(typeInfo);
             }
             finally
             {
-                if (typeInfo != IntPtr.Zero)
-                {
-                    NativeMethods.OrtReleaseTypeInfo(typeInfo);
-                }
+                NativeMethods.OrtReleaseTypeInfo(typeInfo);
             }
         }
 
@@ -1073,13 +924,7 @@ internal IntPtr Handle
 
         #endregion
 
-        #region destructors disposers
-
-
-        ~InferenceSession()
-        {
-            Dispose(false);
-        }
+        #region IDisposable/ no finalizers needed
 
         public void Dispose()
         {
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
index 029dcdcb03f00..4b991624e30a4 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NamedOnnxValue.cs
@@ -9,6 +9,10 @@
 
 namespace Microsoft.ML.OnnxRuntime
 {
+    /// <summary>
+    /// The name of the class is a misnomer, it does not hold any
+    /// Onnx values
+    /// </summary>
     public class NamedOnnxValue
     {
         protected Object _value;
@@ -66,16 +70,9 @@ public IDictionary<K, V> AsDictionary<K, V>()
         /// <param name="onnxValue"></param>
         /// <param name="pinnedMemoryHandle"></param>
         /// <param name="disposeOnnxValueAfterUse"></param>
-        internal virtual void ToNativeOnnxValue(
-            out IntPtr onnxValue,
-            out MemoryHandle pinnedMemoryHandle,
-            out bool disposeOnnxValueAfterUse)
+        internal virtual OrtValue ToOrtValue(out MemoryHandle? pinnedMemoryHandle)
         {
-            var ortValue = OrtValue.CreateFromTensorObject(_value, out pinnedMemoryHandle, out TensorElementType elementType);
-            onnxValue = ortValue.Disown();
-            // Dispose any other parts if any
-            ortValue.Dispose();
-            disposeOnnxValueAfterUse = true;
+            return OrtValue.CreateFromTensorObject(_value, out pinnedMemoryHandle, out TensorElementType elementType);
         }
 
         // may expose different types of getters in future
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index a58e8ba6e157e..95a4d03d9eb09 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -338,10 +338,10 @@ static NativeMethods()
         public delegate IntPtr /*(ONNStatus*)*/ DOrtRun(
                                                 IntPtr /*(OrtSession*)*/ session,
                                                 IntPtr /*(OrtSessionRunOptions*)*/ runOptions,  // can be null to use the default options
-                                                string[] inputNames,
+                                                IntPtr[] inputNames,
                                                 IntPtr[] /* (OrtValue*[])*/ inputValues,
                                                 UIntPtr inputCount,
-                                                string[] outputNames,
+                                                IntPtr[] outputNames,
                                                 UIntPtr outputCount,
                                                 IntPtr[] outputValues /* An array of output value pointers. Array must be allocated by the caller */
                                                 );
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
index f82f84e27e93d..426a4445e1605 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtAllocator.cs
@@ -234,19 +234,6 @@ public class OrtMemoryAllocation : IDisposable
     {
         private OrtAllocator _allocator;
 
-        /// <summary>
-        /// Bind an arbitrary piece of native memory to the instance
-        /// The instance will not have the ownership of this memory.
-        /// </summary>
-        /// <param name="pointer"></param>
-        /// <param name="size"></param>
-        public OrtMemoryAllocation(IntPtr pointer, uint size)
-        {
-            _allocator = null;
-            Pointer = pointer;
-            Size = size;
-        }
-
         /// <summary>
         /// This constructs an instance representing an native memory allocation.
         /// Typically returned by OrtAllocator.Allocate(). However, some APIs return
@@ -257,7 +244,7 @@ public OrtMemoryAllocation(IntPtr pointer, uint size)
         /// <param name="allocator"></param>
         /// <param name="pointer"></param>
         /// <param name="size"></param>
-        public OrtMemoryAllocation(OrtAllocator allocator, IntPtr pointer, uint size)
+        internal OrtMemoryAllocation(OrtAllocator allocator, IntPtr pointer, uint size)
         {
             _allocator = allocator;
             Pointer = pointer;
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index 5460f62c24860..b10887dad5cc0 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -4,6 +4,7 @@
 using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
 using System.Buffers;
+using System.Diagnostics;
 using System.Runtime.InteropServices;
 
 namespace Microsoft.ML.OnnxRuntime
@@ -27,13 +28,26 @@ public class OrtValue : IDisposable
         /// Use factory methods to instantiate
         /// </summary>
         /// <param name="handle"></param>
-        internal OrtValue(IntPtr handle)
+        /// <param name="owned">Default true, own the raw handle</param>
+        internal OrtValue(IntPtr handle, bool owned = true)
         {
             Handle = handle;
+            IsOwned = owned;
         }
 
         internal IntPtr Handle { get; private set; }
 
+        internal bool IsOwned { get; private set; }
+
+        #region NamedOnnxValue/DisposableOnnxValue accommodations
+
+        // DisposableOnnxValue class owns Native handle to OrtValue
+        // NamedOnnxValue does not own anything but creates a new one
+        // which presents a fundamental semantic difference to ToOrtValue interface.
+        //
+        // We provide a way to relinquish ownership as well as return an instance of
+        // OrtValue that is still disposable but does not have ownership
+
         /// <summary>
         /// This internal interface is used to transfer ownership elsewhere.
         /// This instance must still be disposed in case there are other native
@@ -44,9 +58,12 @@ internal IntPtr Disown()
         {
             var handle = Handle;
             Handle = IntPtr.Zero;
+            IsOwned = false;
             return handle;
         }
 
+        #endregion
+
         /// <summary>
         /// Factory method to construct an OrtValue of Tensor type on top of pre-allocated memory.
         /// This can be a piece of native memory allocated by OrtAllocator (possibly on a device)
@@ -95,7 +112,8 @@ out ortValueHandle
         }
 
         /// <summary>
-        /// This is a factory method that ta
+        /// This is a factory method creates a native Onnxruntime OrtValue containing a tensor.
+        /// However, it re-uses managed memory if possible.
         /// </summary>
         /// <param name="value">Tensor object</param>
         /// <param name="memoryHandle">For all tensor types but string tensors we endevour to use managed memory
@@ -103,7 +121,7 @@ out ortValueHandle
         /// </param>
         /// <param name="elementType">discovered tensor element type</param>
         /// <returns></returns>
-        public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle memoryHandle,
+        public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle? memoryHandle,
                                                                     out TensorElementType elementType)
         {
             // Check if this is a Tensor
@@ -119,7 +137,7 @@ public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle mem
                 throw new OnnxRuntimeException(ErrorCode.RequirementNotRegistered, "BUG Check");
             }
 
-            MemoryHandle memHandle = default;
+            MemoryHandle? memHandle;
             OrtValue ortValue = null;
             int dataBufferLength = 0;
             long[] shape = null;
@@ -130,7 +148,7 @@ public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle mem
             if (typeInfo.IsString)
             {
                 ortValue = CreateStringTensor(value as Tensor<string>);
-                memHandle = default;
+                memHandle = null;
             }
             else
             {
@@ -192,10 +210,11 @@ public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle mem
 
                 try
                 {
+                    Debug.Assert(memHandle.HasValue);
                     IntPtr dataBufferPointer = IntPtr.Zero;
                     unsafe
                     {
-                        dataBufferPointer = (IntPtr)memHandle.Pointer;
+                        dataBufferPointer = (IntPtr)((MemoryHandle)memHandle).Pointer;
                     }
 
                     IntPtr nativeValue;
@@ -212,7 +231,7 @@ public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle mem
                 }
                 catch (Exception e)
                 {
-                    memHandle.Dispose();
+                    memHandle?.Dispose();
                     throw e;
                 }
             }
@@ -225,7 +244,7 @@ private static void PinAsTensor<T>(
                                             Tensor<T> tensor,
                                             TensorElementType nativeElementType,
                                             int elementSize,
-                                            out MemoryHandle pinnedHandle,
+                                            out MemoryHandle? pinnedHandle,
                                             out int dataBufferLength,
                                             out long[] shape,
                                             out int rank)
@@ -320,9 +339,13 @@ protected virtual void Dispose(bool disposing)
             if (disposing)
             {
                 // We have to surrender ownership to some legacy classes
+                // Or we never had that ownership to begin with
                 if (Handle != IntPtr.Zero)
                 {
-                    NativeMethods.OrtReleaseValue(Handle);
+                    if (IsOwned)
+                    {
+                        NativeMethods.OrtReleaseValue(Handle);
+                    }
                     // Prevent use after disposal
                     Handle = IntPtr.Zero;
                 }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/RunOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/RunOptions.cs
index 698cea3f2ad6a..3bd35e3f94700 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/RunOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/RunOptions.cs
@@ -106,13 +106,7 @@ public bool Terminate
         private bool _terminate = false; //value set to default value of the C++ RunOptions
 
 
-        #region destructors disposers
-
-        ~RunOptions()
-        {
-            Dispose(false);
-        }
-
+        #region IDisposable
 
         public void Dispose()
         {
@@ -125,9 +119,8 @@ protected virtual void Dispose(bool disposing)
         {
             if (disposing)
             {
-                // cleanup managed resources
+                NativeMethods.OrtReleaseRunOptions(_nativePtr);
             }
-            NativeMethods.OrtReleaseRunOptions(_nativePtr);
         }
 
         #endregion
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
index 0fe79c047520e..cc24b69ec7031 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/SessionOptions.cs
@@ -168,7 +168,6 @@ public void RegisterCustomOpLibrary(string libraryPath)
         }
 
         #endregion
-        #region Public Properties
 
         internal IntPtr Handle
         {
@@ -178,6 +177,7 @@ internal IntPtr Handle
             }
         }
 
+        #region Public Properties
         /// <summary>
         /// Enables the use of the memory allocation patterns in the first Run() call for subsequent runs. Default = true.
         /// </summary>
@@ -447,12 +447,7 @@ private static bool CheckCudaExecutionProviderDLLs()
 
 
         #endregion
-        #region destructors disposers
-
-        ~SessionOptions()
-        {
-            Dispose(false);
-        }
+        #region IDisposable
 
         public void Dispose()
         {
@@ -464,9 +459,9 @@ protected virtual void Dispose(bool disposing)
         {
             if (disposing)
             {
-                // cleanup managed resources
+                NativeMethods.OrtReleaseSessionOptions(_nativePtr);
+                _nativePtr = IntPtr.Zero;
             }
-            NativeMethods.OrtReleaseSessionOptions(_nativePtr);
         }
 
         #endregion

From dc0e73063be9eeebaf70ee47214bdaa396026c30 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Tue, 4 Aug 2020 15:42:44 -0700
Subject: [PATCH 35/39] Minor fixes.

---
 csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs | 5 +----
 csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs         | 6 +++---
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index fd5a53f2cf1e6..2ae5d87c72a39 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -540,9 +540,6 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> RunWithBindingAnd
                     {
                         var ortValue = ortValues.ElementAt(i);
                         result.Add(DisposableNamedOnnxValue.CreateTensorFromOnnxValue(outputNames[i], ortValue.Handle));
-                        // We transferred ownership of the handle.
-                        // But ortValues will still call Dispose() if there are other parts of the
-                        // object needs disposing.
                         ortValue.Disown();
                     }
                 } catch(Exception e)
@@ -612,11 +609,11 @@ private IntPtr[] GetOrtValuesHandles(IReadOnlyCollection<NamedOnnxValue> values,
                 var input = values.ElementAt(inputIndex);
                 MemoryHandle? memHandle;
                 var ortValue = input.ToOrtValue(out memHandle);
-                cleanupList.Add(ortValue);
                 if (memHandle.HasValue)
                 {
                     cleanupList.Add(memHandle);
                 }
+                cleanupList.Add(ortValue);
                 result[inputIndex] = ortValue.Handle;
                 inputIndex++;
             }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index b10887dad5cc0..112b068d241cf 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -37,8 +37,6 @@ internal OrtValue(IntPtr handle, bool owned = true)
 
         internal IntPtr Handle { get; private set; }
 
-        internal bool IsOwned { get; private set; }
-
         #region NamedOnnxValue/DisposableOnnxValue accommodations
 
         // DisposableOnnxValue class owns Native handle to OrtValue
@@ -47,7 +45,7 @@ internal OrtValue(IntPtr handle, bool owned = true)
         //
         // We provide a way to relinquish ownership as well as return an instance of
         // OrtValue that is still disposable but does not have ownership
-
+        //
         /// <summary>
         /// This internal interface is used to transfer ownership elsewhere.
         /// This instance must still be disposed in case there are other native
@@ -62,6 +60,8 @@ internal IntPtr Disown()
             return handle;
         }
 
+        internal bool IsOwned { get; private set; }
+
         #endregion
 
         /// <summary>

From db70346de59e88e02b888211f0cd9f3129f6bf97 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 5 Aug 2020 14:16:05 -0700
Subject: [PATCH 36/39] Fix encoding conversion and
 OrtIoBinding.GetOutputNames()

---
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  2 +-
 .../NativeOnnxValueHelper.cs                  | 14 ++++-----
 .../Microsoft.ML.OnnxRuntime/OrtIoBinding.cs  | 20 ++++++++++---
 .../src/Microsoft.ML.OnnxRuntime/OrtValue.cs  | 30 +++++++++++--------
 .../InferenceTest.cs                          | 14 ++++++++-
 5 files changed, 53 insertions(+), 27 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 95a4d03d9eb09..5ece3d856497b 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -712,7 +712,7 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         /// <param name="buffer">a continuous buffer that contains all output names.
         /// Names are not zero terminated use lengths to extract strings. This needs to be deallocated.</param>
         /// <param name="lengths">A buffer that contains lengths (size_t) for each of the returned strings in order.
-        /// You may use Marshal.ReadIntPtr() to read each element of the array. The buffer must be deallocated.</param>
+        /// The buffer must be deallocated.</param>
         /// <param name="count">this contains the count of names returned which is the number of elements in lengths.</param>
         /// <returns></returns>
         public delegate IntPtr /* OrtStatus*/ DOrtGetBoundOutputNames(IntPtr /* (const OrtIoBinding*) */ io_binding, IntPtr /* OrtAllocator* */ allocator,
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
index c7ba3b3d5290f..29d6938e4ae2e 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeOnnxValueHelper.cs
@@ -56,7 +56,10 @@ internal static class NativeOnnxValueHelper
         /// <returns>UTF-8 encoded equivalent</returns>
         internal static byte[] StringToZeroTerminatedUtf8(string s)
         {
-            return UTF8Encoding.UTF8.GetBytes(s + Char.MinValue);
+            byte[] utf8Bytes = UTF8Encoding.UTF8.GetBytes(s);
+            Array.Resize(ref utf8Bytes, utf8Bytes.Length + 1);
+            utf8Bytes[utf8Bytes.Length - 1] = 0;
+            return utf8Bytes;
         }
 
         /// <summary>
@@ -65,18 +68,13 @@ internal static byte[] StringToZeroTerminatedUtf8(string s)
         /// </summary>
         /// <param name="nativeUtf8">pointer to native or pinned memory where Utf-8 resides</param>
         /// <returns></returns>
-        internal static string StringFromNativeUtf8(IntPtr nativeUtf8) 
+        internal static string StringFromNativeUtf8(IntPtr nativeUtf8)
         {
             // .NET 5.0 has Marshal.PtrToStringUTF8 that does the below
             int len = 0;
             while (Marshal.ReadByte(nativeUtf8, len) != 0) ++len;
-            return StringFromNativeUtf8(nativeUtf8, 0, len);
-        }
-
-        internal static string StringFromNativeUtf8(IntPtr nativeUtf8, int  offset, int len)
-        {
             byte[] buffer = new byte[len];
-            Marshal.Copy(nativeUtf8, buffer, 0, buffer.Length);
+            Marshal.Copy(nativeUtf8, buffer, 0, len);
             return Encoding.UTF8.GetString(buffer, 0, buffer.Length);
         }
     }
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
index 940057434f7a8..364a5ea21c5b2 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtIoBinding.cs
@@ -3,6 +3,7 @@
 
 using System;
 using System.Runtime.InteropServices;
+using System.Text;
 
 namespace Microsoft.ML.OnnxRuntime
 {
@@ -158,14 +159,25 @@ public string[] GetOutputNames()
             using (var bufferAllocation = new OrtMemoryAllocation(allocator, buffer, 0))
             using (var lengthsAllocation = new OrtMemoryAllocation(allocator, lengths, 0))
             {
-                uint outputCount = (uint)count;
+                int outputCount = (int)count;
+                var lens = new int[outputCount];
+                int totalLength = 0;
+                for(int i = 0; i < outputCount; ++i)
+                {
+                    var len =(int)Marshal.ReadIntPtr(lengths, IntPtr.Size * i);
+                    lens[i] = len;
+                    totalLength += len;
+                }
+
+                var stringData = new byte[totalLength];
+                Marshal.Copy(buffer, stringData, 0, stringData.Length);
+
                 string[] result = new string[outputCount];
                 int readOffset = 0;
                 for(int i = 0; i < outputCount; ++i)
                 {
-                    // strLen in bytes
-                    int strLen = (int)Marshal.ReadIntPtr(lengths, IntPtr.Size * i);
-                    result[i] = NativeOnnxValueHelper.StringFromNativeUtf8(buffer, readOffset, strLen);
+                    var strLen = lens[i];
+                    result[i] = Encoding.UTF8.GetString(stringData, readOffset, strLen);
                     readOffset += strLen;
                 }
                 return result;
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
index 112b068d241cf..5c097eccd7edf 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/OrtValue.cs
@@ -155,52 +155,52 @@ public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle? me
                 switch (elType)
                 {
                     case TensorElementType.Float:
-                        PinAsTensor(value as Tensor<float>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<float>, typeSize, out memHandle, out dataBufferLength,
                             out shape, out rank);
                         break;
                     case TensorElementType.Double:
-                        PinAsTensor(value as Tensor<double>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<double>, typeSize, out memHandle, out dataBufferLength,
                                             out shape, out rank);
                         break;
                     case TensorElementType.Int32:
-                        PinAsTensor(value as Tensor<int>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<int>, typeSize, out memHandle, out dataBufferLength,
                             out shape, out rank);
                         break;
                     case TensorElementType.UInt32:
-                        PinAsTensor(value as Tensor<uint>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<uint>, typeSize, out memHandle, out dataBufferLength,
                             out shape, out rank);
                         break;
                     case TensorElementType.Int64:
-                        PinAsTensor(value as Tensor<long>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<long>, typeSize, out memHandle, out dataBufferLength,
                             out shape, out rank);
                         break;
                     case TensorElementType.UInt64:
-                        PinAsTensor(value as Tensor<ulong>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<ulong>, typeSize, out memHandle, out dataBufferLength,
                                     out shape, out rank);
                         break;
                     case TensorElementType.Int16:
-                        PinAsTensor(value as Tensor<short>, elType, typeSize, out memHandle, out dataBufferLength,
+                        PinAsTensor(value as Tensor<short>, typeSize, out memHandle, out dataBufferLength,
                             out shape, out rank);
                         break;
 
                     case TensorElementType.UInt16:
-                        PinAsTensor(value as Tensor<ushort>, elType, typeSize,
+                        PinAsTensor(value as Tensor<ushort>, typeSize,
                                     out memHandle, out dataBufferLength,
                                     out shape, out rank);
 
                         break;
                     case TensorElementType.UInt8:
-                        PinAsTensor(value as Tensor<byte>, elType, typeSize,
+                        PinAsTensor(value as Tensor<byte>, typeSize,
                                     out memHandle, out dataBufferLength,
                                     out shape, out rank);
                         break;
                     case TensorElementType.Int8:
-                        PinAsTensor(value as Tensor<sbyte>, elType, typeSize,
+                        PinAsTensor(value as Tensor<sbyte>, typeSize,
                             out memHandle, out dataBufferLength,
                             out shape, out rank);
                         break;
                     case TensorElementType.Bool:
-                        PinAsTensor(value as Tensor<bool>, elType, typeSize,
+                        PinAsTensor(value as Tensor<bool>, typeSize,
                                     out memHandle, out dataBufferLength,
                                     out shape, out rank);
                         break;
@@ -242,7 +242,6 @@ public static OrtValue CreateFromTensorObject(Object value, out MemoryHandle? me
 
         private static void PinAsTensor<T>(
                                             Tensor<T> tensor,
-                                            TensorElementType nativeElementType,
                                             int elementSize,
                                             out MemoryHandle? pinnedHandle,
                                             out int dataBufferLength,
@@ -282,6 +281,11 @@ private static void PinAsTensor<T>(
 
         private static OrtValue CreateStringTensor(Tensor<string> tensor)
         {
+            if (tensor == null)
+            {
+                throw new OnnxRuntimeException(ErrorCode.Fail, "Cast to Tensor<string> failed. BUG check!");
+            }
+
             int totalLength = 0;
             for (int i = 0; i < tensor.Length; i++)
             {
@@ -317,8 +321,8 @@ out valueHandle
                     {
                         var utf8str = NativeOnnxValueHelper.StringToZeroTerminatedUtf8(tensor.GetValue(i));
                         var gcHandle = GCHandle.Alloc(utf8str, GCHandleType.Pinned);
-                        pinnedHandles.Add(new PinnedGCHandle(gcHandle));
                         nativeStrings[i] = gcHandle.AddrOfPinnedObject();
+                        pinnedHandles.Add(new PinnedGCHandle(gcHandle));
                     }
 
                     using (var pinnedStrings = new PinnedGCHandle(GCHandle.Alloc(nativeStrings, GCHandleType.Pinned)))
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index 215432395812b..d8a81af1fb334 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1799,7 +1799,6 @@ private void TestBinding()
                 var outputMeta = session.OutputMetadata;
                 var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
 
-                // var inputs = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor<float>("data_0", inputTensor) };
                 var ioBinding = session.CreateIoBinding();
                 dispList.Add(ioBinding);
 
@@ -1810,6 +1809,19 @@ private void TestBinding()
                 var ortAllocationOutput = allocator.Allocate((uint)outputData.Length * sizeof(float));
                 dispList.Add(ortAllocationOutput);
 
+                // Test GetOutputNames, bind two output names
+                {
+                    var cyrName = "несуществующийВыход";
+                    var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
+                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
+                    ioBinding.BindOutput(cyrName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
+                    string[] outputs = ioBinding.GetOutputNames();
+                    Assert.Equal(2, outputs.Length);
+                    Assert.Equal(outputName, outputs[0]);
+                    Assert.Equal(cyrName, outputs[1]);
+                    ioBinding.ClearBoundOutputs();
+                }
+
                 // Test 1. Bind input to fixed, Bind Output to Fixed.
                 using (FixedBufferOnnxValue fixeInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor),
                       fixedOutputBuffer = FixedBufferOnnxValue.CreateFromTensor(outputTensor))

From 0d1fbc881b88905b8e7239f755c63fc5fec2bacc Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Wed, 5 Aug 2020 16:09:46 -0700
Subject: [PATCH 37/39] Fix Run() bugs.

---
 .../InferenceSession.cs                       | 76 +++++++------------
 .../InferenceTest.cs                          | 38 ----------
 2 files changed, 26 insertions(+), 88 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
index 2ae5d87c72a39..3fa97a0c18b9a 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/InferenceSession.cs
@@ -193,17 +193,11 @@ public IDisposableReadOnlyCollection<DisposableNamedOnnxValue> Run(
             using (var cleanupList = new DisposableList<IDisposable>())
             {
                 var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList);
+                IntPtr[] inputValuesArray = GetOrtValuesHandles(inputValues, true);
                 var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
 
-                int inputIndex = 0;
-                IntPtr[] inputValuesArray = new IntPtr[inputValues.Count];
-                foreach (var input in inputValues)
-                {
-                    inputValuesArray[inputIndex] = input.Value.Handle;
-                    inputIndex++;
-                }
 
-                var ortValues = RunImpl(options, inputNamesArray, inputNamesArray, outputNamesArray, cleanupList);
+                var ortValues = RunImpl(options, inputNamesArray, inputValuesArray, outputNamesArray, cleanupList);
                 return CreateDisposableResult(ortValues, outputNames);
             }
         }
@@ -256,29 +250,11 @@ public void Run(
             {
                 // prepare inputs
                 var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList);
-                var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
-
-                IntPtr[] inputValuesArray = new IntPtr[inputNames.Count];
-                int inputIndex = 0;
-                foreach (var input in inputValues)
-                {
-                    inputValuesArray[inputIndex] = input.Value.Handle;
-                    inputIndex++;
-                }
+                IntPtr[] inputValuesArray = GetOrtValuesHandles(inputValues, true);
 
                 // prepare outputs
-                IntPtr[] outputValuesArray = new IntPtr[outputNames.Count];
-                int outputIndex = 0;
-                foreach (var output in outputValues)
-                {
-                    if (output.ElementType == Tensors.TensorElementType.String)
-                    {
-                        throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
-                    }
-
-                    outputValuesArray[outputIndex] = output.Value.Handle;
-                    outputIndex++;
-                }
+                var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
+                IntPtr[] outputValuesArray = GetOrtValuesHandles(outputValues, false);
 
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
                                                     _nativeHandle,
@@ -325,8 +301,9 @@ public void Run(
             using(var cleanupList = new DisposableList<IDisposable>())
             {
                 var inputNamesArray = ConvertNamesToUtf8(inputs, i => i.Name, cleanupList);
-                var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList);
                 var inputValuesArray = GetOrtValuesHandles(inputs, cleanupList);
+
+                var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList);
                 var outputValuesArray = GetOrtValuesHandles(outputs, cleanupList);
 
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
@@ -386,17 +363,7 @@ public void Run(
 
                 // prepare outputs
                 var outputNamesArray = ConvertNamesToUtf8(outputNames, n => n, cleanupList);
-                IntPtr[] outputValuesArray = new IntPtr[outputValues.Count];
-                int outputIndex = 0;
-                foreach (var output in outputValues)
-                {
-                    if (output.ElementType == TensorElementType.String)
-                    {
-                        throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
-                    }
-                    outputValuesArray[outputIndex] = output.Value.Handle;
-                    outputIndex++;
-                }
+                var outputValuesArray = GetOrtValuesHandles(outputValues, false);
 
                 NativeApiStatus.VerifySuccess(NativeMethods.OrtRun(
                                                     _nativeHandle,
@@ -453,13 +420,7 @@ public void Run(
             {
                 // prepare inputs
                 var inputNamesArray = ConvertNamesToUtf8(inputNames, n => n, cleanupList);
-                IntPtr[] inputValuesArray = new IntPtr[inputValues.Count];
-                int inputIndex = 0;
-                foreach (var input in inputValues)
-                {
-                    inputValuesArray[inputIndex] = input.Value.Handle;
-                    inputIndex++;
-                }
+                var inputValuesArray = GetOrtValuesHandles(inputValues, true);
 
                 // prepare outputs
                 var outputNamesArray = ConvertNamesToUtf8(outputs, o => o.Name, cleanupList);
@@ -615,12 +576,27 @@ private IntPtr[] GetOrtValuesHandles(IReadOnlyCollection<NamedOnnxValue> values,
                 }
                 cleanupList.Add(ortValue);
                 result[inputIndex] = ortValue.Handle;
-                inputIndex++;
             }
             return result;
         }
 
-        private DisposableList<OrtValue> RunImpl(RunOptions options, IntPtr[] inputNames, IntPtr[] inputValues, IntPtr[] outputNames,
+        private IntPtr[] GetOrtValuesHandles(IReadOnlyCollection<FixedBufferOnnxValue> values, bool input)
+        {
+            var valuesArray = new IntPtr[values.Count];
+            for (int index = 0; index < values.Count; ++index)
+            {
+                var v = values.ElementAt(index);
+                if (!input && v.ElementType == Tensors.TensorElementType.String)
+                {
+                    throw new NotSupportedException("Using string type FixedBufferOnnxValue in outputs is not supported.");
+                }
+                valuesArray[index] = v.Value.Handle;
+            }
+            return valuesArray;
+        }
+
+
+    private DisposableList<OrtValue> RunImpl(RunOptions options, IntPtr[] inputNames, IntPtr[] inputValues, IntPtr[] outputNames,
             DisposableList<IDisposable> cleanupList)
         {
             var ortValues = new DisposableList<OrtValue>(outputNames.Length);
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index d8a81af1fb334..a6a56fbee1af1 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -1036,44 +1036,6 @@ private void TestReusingRunOutputStringType()
             }
         }
 
-        [Fact]
-        private void TestReusingDisposedRunOutput()
-        {
-            // model takes 1x5 input of fixed type, echoes back
-            string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "test_types_BOOL.pb");
-            using (var session = new InferenceSession(modelPath))
-            {
-                var container = new List<NamedOnnxValue>();
-                var tensorIn = new DenseTensor<bool>(new bool[] { true, false, true, false, true }, new int[] { 1, 5 });
-                var nov = NamedOnnxValue.CreateFromTensor("input", tensorIn);
-                container.Add(nov);
-                var res1 = session.Run(container);
-
-                // Dispose the result tensor
-                res1.First().Dispose();
-
-                bool succeeded = false;
-
-                // Now try using the disposed output as input to another Run()
-                try
-                {
-                    // Run() should fail with a user friendly error message.
-                    session.Run(res1);
-                }
-
-                catch (ObjectDisposedException e)
-                {
-                    var errorString = "This instance of DisposableNamedOnnxValue has already been disposed";
-
-                    Assert.Contains(errorString, e.Message);
-
-                    succeeded = true;
-                }
-
-                Assert.True(succeeded);
-            }
-        }
-
         [Fact]
         public void TestCreateFixedBufferOnnxValueFromStringTensor()
         {

From 4710ee8e8ba7846e9f021313953c067d47069971 Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Thu, 6 Aug 2020 12:09:50 -0700
Subject: [PATCH 38/39] Move some unsafe test code to the main project so it
 can be tested there. PutIDisposableTest back to the test code.

---
 .../FixedBufferOnnxValue.cs                   |   1 -
 .../InferenceTest.cs                          | 139 +++++++-----------
 .../OrtIoBindingAllocationTest.cs             | 100 +++++++++++++
 3 files changed, 157 insertions(+), 83 deletions(-)
 create mode 100644 csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
index 41caec3ab6846..8ef1fc119f8f2 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/FixedBufferOnnxValue.cs
@@ -1,7 +1,6 @@
 using Microsoft.ML.OnnxRuntime.Tensors;
 using System;
 using System.Buffers;
-using System.Diagnostics;
 
 namespace Microsoft.ML.OnnxRuntime
 {
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
index a6a56fbee1af1..b45a9951e1564 100644
--- a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/InferenceTest.cs
@@ -11,6 +11,9 @@
 using Xunit;
 using Xunit.Abstractions;
 
+// This runs in a separate package built from EndToEndTests
+// and for this reason it can not refer to non-public members
+// of Onnxruntime package
 namespace Microsoft.ML.OnnxRuntime.Tests
 {
     public class InferenceTest
@@ -222,7 +225,7 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
                 }
 
                 // Run inference with pinned inputs and outputs created with in Run()
-                using (var pinnedInputs = new DisposableList<FixedBufferOnnxValue>())
+                using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
                 {
                     var inputNames = container.Select(i => i.Name).ToArray();
                     pinnedInputs.AddRange(container.Select(i => FixedBufferOnnxValue.CreateFromTensor(i.AsTensor<float>())));
@@ -252,7 +255,7 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
                 }
 
                 // Run inference with pinned inputs and named outputs
-                using (var pinnedInputs = new DisposableList<FixedBufferOnnxValue>())
+                using (var pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>())
                 {
                     var inputNames = container.Select(i => i.Name).ToArray();
                     pinnedInputs.AddRange(container.Select(i => FixedBufferOnnxValue.CreateFromTensor(i.AsTensor<float>())));
@@ -269,7 +272,7 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
                 // Run inference with named inputs and pinned outputs
                 {
                     // correct pre-allocated outputs
-                    using (var pinnedOutputs = new DisposableList<FixedBufferOnnxValue>())
+                    using (var pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
                     {
                         var outputTensor = new DenseTensor<float>(expectedOutputDimensions);
                         pinnedOutputs.Add(FixedBufferOnnxValue.CreateFromTensor(outputTensor));
@@ -279,8 +282,8 @@ private void CanRunInferenceOnAModel(GraphOptimizationLevel graphOptimizationLev
                 }
 
                 // Run inference with pinned inputs and pinned outputs
-                using (DisposableList<FixedBufferOnnxValue> pinnedInputs = new DisposableList<FixedBufferOnnxValue>(),
-                                                            pinnedOutputs = new DisposableList<FixedBufferOnnxValue>())
+                using (DisposableListTest<FixedBufferOnnxValue> pinnedInputs = new DisposableListTest<FixedBufferOnnxValue>(),
+                                                            pinnedOutputs = new DisposableListTest<FixedBufferOnnxValue>())
                 {
                     var inputNames = container.Select(i => i.Name).ToArray();
                     pinnedInputs.AddRange(container.Select(i => FixedBufferOnnxValue.CreateFromTensor(i.AsTensor<float>())));
@@ -425,7 +428,7 @@ private void ThrowInconsistentPinnedInputs()
             var inputData = tuple.Item2;
             var tensor = tuple.Item3;
 
-            using (var inputs = new DisposableList<FixedBufferOnnxValue>())
+            using (var inputs = new DisposableListTest<FixedBufferOnnxValue>())
             {
                 inputs.Add(FixedBufferOnnxValue.CreateFromTensor(tensor));
                 var ex = Assert.Throws<ArgumentException>(() => session.Run(new string[0], inputs));
@@ -505,7 +508,7 @@ private void ThrowInconsistentPinnedOutputs()
             var inputs = new List<NamedOnnxValue> { NamedOnnxValue.CreateFromTensor<float>("data_0", inputTensor) };
             var outputTensor = new DenseTensor<float>((ReadOnlySpan<int>)new[] { 1, 1000, 1, 1 });
 
-            using (var outputs = new DisposableList<FixedBufferOnnxValue>())
+            using (var outputs = new DisposableListTest<FixedBufferOnnxValue>())
             {
                 var ex = Assert.Throws<ArgumentException>(() => session.Run(inputs, new string[] { "softmaxout_1" }, outputs));
                 Assert.StartsWith("Length of outputNames (1) must match that of outputValues (0).", ex.Message);
@@ -1624,7 +1627,6 @@ void TestCPUAllocatorInternal(InferenceSession session)
             int device_id = 0;
             using (var info_cpu = new OrtMemoryInfo(OrtMemoryInfo.allocatorCPU, OrtAllocatorType.ArenaAllocator, device_id, OrtMemType.Default))
             {
-                Assert.NotEqual(info_cpu.Pointer, IntPtr.Zero);
                 Assert.Equal("Cpu", info_cpu.Name);
                 Assert.Equal(device_id, info_cpu.Id);
                 Assert.Equal(OrtAllocatorType.ArenaAllocator, info_cpu.GetAllocatorType());
@@ -1632,7 +1634,6 @@ void TestCPUAllocatorInternal(InferenceSession session)
 
                 using (var allocator = new OrtAllocator(session, info_cpu))
                 {
-                    Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
                     // Allocator type returned may be different on x86 so we don't compare.
                     Assert.Equal(info_cpu.Name, alloc_info.Name);
@@ -1641,7 +1642,6 @@ void TestCPUAllocatorInternal(InferenceSession session)
 
                     uint size = 1024;
                     OrtMemoryAllocation chunk = allocator.Allocate(size);
-                    Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
                     var chunk_info = chunk.Info;
                     // Allocator type returned may be different on x86 so we don't compare.
@@ -1660,7 +1660,6 @@ void TestCUDAAllocatorInternal(InferenceSession session)
             int device_id = 0;
             using (var info_cuda = new OrtMemoryInfo(OrtMemoryInfo.allocatorCUDA, OrtAllocatorType.ArenaAllocator, device_id, OrtMemType.Default))
             {
-                Assert.NotEqual(info_cuda.Pointer, IntPtr.Zero);
                 Assert.Equal("Cuda", info_cuda.Name);
                 Assert.Equal(device_id, info_cuda.Id);
                 Assert.Equal(OrtAllocatorType.ArenaAllocator, info_cuda.GetAllocatorType());
@@ -1668,13 +1667,11 @@ void TestCUDAAllocatorInternal(InferenceSession session)
 
                 using (var allocator = new OrtAllocator(session, info_cuda))
                 {
-                    Assert.NotEqual(allocator.Pointer, IntPtr.Zero);
                     var alloc_info = allocator.Info;
                     Assert.True(info_cuda.Equals(alloc_info));
 
                     uint size = 1024;
                     OrtMemoryAllocation chunk = allocator.Allocate(size);
-                    Assert.NotEqual(chunk.Pointer, IntPtr.Zero);
                     Assert.Equal(chunk.Size, size);
                     Assert.True(chunk.Info.Equals(alloc_info));
                     chunk.Dispose();
@@ -1704,53 +1701,14 @@ private void TestAllocator()
             }
         }
 
-        /// <summary>
-        /// This works only for allocations accessible from host memory
-        /// </summary>
-        /// <param name="buffer"></param>
-        /// <param name="elements"></param>
-        private static void PopulateNativeBufferFloat(OrtMemoryAllocation buffer, float[] elements)
-        {
-            if(buffer.Size < elements.Length * sizeof(float))
-            {
-                Assert.True(false);
-            }
-
-            unsafe
-            {
-                float* p = (float*)buffer.Pointer;
-                for (int i = 0; i < elements.Length; ++i)
-                {
-                    *p++ = elements[i];
-                }
-            }
-        }
-
-        private static void CompareNativeBufferFloat(OrtMemoryAllocation buffer, float[] elements, IEqualityComparer<float> comp)
-        {
-            if (buffer.Size != elements.Length * sizeof(float))
-            {
-                Assert.True(false);
-            }
-
-            unsafe
-            {
-                float* p = (float*)buffer.Pointer;
-                for (int i = 0; i < elements.Length; ++i)
-                {
-                    Assert.True(comp.Equals(*p++, elements[i]));
-                }
-            }
-        }
-
         [Fact]
-        private void TestBinding()
+        private void TestIOBinding()
         {
             var inputName = "data_0";
             var outputName = "softmaxout_1";
             var allocator = OrtAllocator.DefaultInstance;
             // From the model
-            using (var dispList = new DisposableList<IDisposable>())
+            using (var dispList = new DisposableListTest<IDisposable>())
             {
                 var tuple = OpenSessionSqueezeNet();
                 var session = tuple.Item1;
@@ -1764,10 +1722,6 @@ private void TestBinding()
                 var ioBinding = session.CreateIoBinding();
                 dispList.Add(ioBinding);
 
-                var ortAllocationInput = allocator.Allocate((uint)inputData.Length * sizeof(float));
-                dispList.Add(ortAllocationInput);
-                PopulateNativeBufferFloat(ortAllocationInput, inputData);
-
                 var ortAllocationOutput = allocator.Allocate((uint)outputData.Length * sizeof(float));
                 dispList.Add(ortAllocationOutput);
 
@@ -1801,28 +1755,7 @@ private void TestBinding()
                     }
                 }
 
-                //Test 2. Use the same input as in Test 1
-                // but rebind the output to OrtAllocated buffer
-                using (FixedBufferOnnxValue fixedInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor))
-                {
-                    ioBinding.BindInput(inputName, fixedInputBuffer);
-                    var longShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
-                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, longShape, ortAllocationOutput);
-                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
-                    {
-                        Assert.Equal(1, outputs.Count);
-                        var output = outputs.First();
-                        Assert.Equal(outputName, output.Name);
-                        var tensor = output.AsTensor<float>();
-                        Assert.True(tensor.IsFixedSize);
-                        Assert.Equal(outputData, tensor.ToArray<float>(), new floatComparer());
-
-                        // Let's check that the output buffer actually contains the data
-                        CompareNativeBufferFloat(ortAllocationOutput, outputData, new floatComparer());
-                    }
-                }
-
-                // Test 3. Bind input to preallocated buffer. Output to a device so the allocation would happen
+                // Test 2. Bind input to preallocated buffer. Output to a device so the allocation would happen
                 // automatically
                 using (FixedBufferOnnxValue fixedInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor))
                 {
@@ -2139,7 +2072,7 @@ static NamedOnnxValue CreateNamedOnnxValueFromRawData<T>(string name, byte[] raw
             return NamedOnnxValue.CreateFromTensor<T>(name, dt);
         }
 
-        static Tuple<InferenceSession, float[], DenseTensor<float>, float[]> OpenSessionSqueezeNet(int? cudaDeviceId = null)
+        internal static Tuple<InferenceSession, float[], DenseTensor<float>, float[]> OpenSessionSqueezeNet(int? cudaDeviceId = null)
         {
             string modelPath = Path.Combine(Directory.GetCurrentDirectory(), "squeezenet.onnx");
 #if USE_CUDA
@@ -2167,7 +2100,7 @@ static Tuple<InferenceSession, float[], DenseTensor<float>, float[]> OpenSession
             }
         }
 
-        class floatComparer : IEqualityComparer<float>
+        internal class floatComparer : IEqualityComparer<float>
         {
             private float atol = 1e-3f;
             private float rtol = 1.7e-2f;
@@ -2218,6 +2151,48 @@ public SkipNonPackageTests()
                 }
             }
         }
+    }
+
+    // Copy of the class that is internal in the main package
+    internal class DisposableListTest<T> : List<T>, IDisposableReadOnlyCollection<T>
+        where T : IDisposable
+    {
+        public DisposableListTest() { }
+        public DisposableListTest(int count) : base(count) { }
 
+        #region IDisposable Support
+        private bool disposedValue = false; // To detect redundant calls
+
+        protected virtual void Dispose(bool disposing)
+        {
+            if (!disposedValue)
+            {
+                if (disposing)
+                {
+                    // Dispose in the reverse order.
+                    // Objects should typically be destroyed/disposed
+                    // in the reverse order of its creation
+                    // especially if the objects created later refer to the
+                    // objects created earlier. For homogeneous collections of objects
+                    // it would not matter.
+                    for (int i = this.Count - 1; i >= 0; --i)
+                    {
+                        this[i]?.Dispose();
+                    }
+                    this.Clear();
+                }
+
+                disposedValue = true;
+            }
+        }
+
+        // This code added to correctly implement the disposable pattern.
+        public void Dispose()
+        {
+            // Do not change this code. Put cleanup code in Dispose(bool disposing) above.
+            Dispose(true);
+            GC.SuppressFinalize(this);
+        }
+        #endregion
     }
 }
diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
new file mode 100644
index 0000000000000..ac28829ad530e
--- /dev/null
+++ b/csharp/test/Microsoft.ML.OnnxRuntime.Tests/OrtIoBindingAllocationTest.cs
@@ -0,0 +1,100 @@
+﻿using Microsoft.ML.OnnxRuntime.Tensors;
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+using Xunit;
+using static Microsoft.ML.OnnxRuntime.Tests.InferenceTest;
+
+namespace Microsoft.ML.OnnxRuntime.Tests
+{
+    public class OrtIoBindingAllocationTest
+    {
+        /// <summary>
+        /// This works only for allocations accessible from host memory
+        /// </summary>
+        /// <param name="buffer"></param>
+        /// <param name="elements"></param>
+        private static void PopulateNativeBufferFloat(OrtMemoryAllocation buffer, float[] elements)
+        {
+            if (buffer.Size < elements.Length * sizeof(float))
+            {
+                Assert.True(false);
+            }
+
+            unsafe
+            {
+                float* p = (float*)buffer.Pointer;
+                for (int i = 0; i < elements.Length; ++i)
+                {
+                    *p++ = elements[i];
+                }
+            }
+        }
+
+        [Fact]
+        public void TestIOBindingWithOrtAllocation()
+        {
+            var inputName = "data_0";
+            var outputName = "softmaxout_1";
+            var allocator = OrtAllocator.DefaultInstance;
+            // From the model
+            using (var dispList = new DisposableListTest<IDisposable>())
+            {
+                var tuple = OpenSessionSqueezeNet();
+                var session = tuple.Item1;
+                var inputData = tuple.Item2;
+                var inputTensor = tuple.Item3;
+                var outputData = tuple.Item4;
+                dispList.Add(session);
+                var inputMeta = session.InputMetadata;
+                var outputMeta = session.OutputMetadata;
+                var outputTensor = new DenseTensor<float>(outputData, outputMeta[outputName].Dimensions);
+
+                var ioBinding = session.CreateIoBinding();
+                dispList.Add(ioBinding);
+
+                var ortAllocationInput = allocator.Allocate((uint)inputData.Length * sizeof(float));
+                dispList.Add(ortAllocationInput);
+                var inputShape = Array.ConvertAll<int, long>(inputMeta[inputName].Dimensions, d => d);
+                PopulateNativeBufferFloat(ortAllocationInput, inputData);
+
+                var ortAllocationOutput = allocator.Allocate((uint)outputData.Length * sizeof(float));
+                dispList.Add(ortAllocationOutput);
+
+                var outputShape = Array.ConvertAll<int, long>(outputMeta[outputName].Dimensions, i => i);
+
+                // Test 1. Bind the output to OrtAllocated buffer
+                using (FixedBufferOnnxValue fixedInputBuffer = FixedBufferOnnxValue.CreateFromTensor(inputTensor))
+                {
+                    ioBinding.BindInput(inputName, fixedInputBuffer);
+                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
+                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    {
+                        Assert.Equal(1, outputs.Count);
+                        var output = outputs.ElementAt(0);
+                        Assert.Equal(outputName, output.Name);
+                        var tensor = output.AsTensor<float>();
+                        Assert.True(tensor.IsFixedSize);
+                        Assert.Equal(outputData, tensor.ToArray<float>(), new floatComparer());
+                    }
+                }
+
+                // Test 2. Bind the input to memory allocation and output to a fixedBuffer
+                {
+                    ioBinding.BindInput(inputName, Tensors.TensorElementType.Float, inputShape, ortAllocationInput);
+                    ioBinding.BindOutput(outputName, Tensors.TensorElementType.Float, outputShape, ortAllocationOutput);
+                    using (var outputs = session.RunWithBindingAndNames(new RunOptions(), ioBinding))
+                    {
+                        Assert.Equal(1, outputs.Count);
+                        var output = outputs.ElementAt(0);
+                        Assert.Equal(outputName, output.Name);
+                        var tensor = output.AsTensor<float>();
+                        Assert.True(tensor.IsFixedSize);
+                        Assert.Equal(outputData, tensor.ToArray<float>(), new floatComparer());
+                    }
+                }
+            }
+        }
+    }
+}

From facd52a12d0b51b79bcb9d4085eea47f9b36d39d Mon Sep 17 00:00:00 2001
From: Dmitri Smirnov <dmitrism@microsoft.com>
Date: Mon, 10 Aug 2020 11:13:48 -0700
Subject: [PATCH 39/39] Fix up merge leftovers

---
 .../DisposableNamedOnnxValue.cs               | 26 +++---
 .../Microsoft.ML.OnnxRuntime/NativeMethods.cs |  6 --
 .../core/session/onnxruntime_c_api.h          | 90 +++++++++----------
 onnxruntime/core/session/onnxruntime_c_api.cc | 19 ++--
 4 files changed, 65 insertions(+), 76 deletions(-)

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
index a5232f91c1ac3..859e543e7bce5 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/DisposableNamedOnnxValue.cs
@@ -21,28 +21,22 @@ public DisposableList() { }
         public DisposableList(int count) : base(count) { }
 
         #region IDisposable Support
-        private bool disposedValue = false; // To detect redundant calls
 
         protected virtual void Dispose(bool disposing)
         {
-            if (!disposedValue)
+            if (disposing)
             {
-                if (disposing)
+                // Dispose in the reverse order.
+                // Objects should typically be destroyed/disposed
+                // in the reverse order of its creation
+                // especially if the objects created later refer to the
+                // objects created earlier. For homogeneous collections of objects
+                // it would not matter.
+                for (int i = this.Count - 1; i >= 0; --i)
                 {
-                    // Dispose in the reverse order.
-                    // Objects should typically be destroyed/disposed
-                    // in the reverse order of its creation
-                    // especially if the objects created later refer to the
-                    // objects created earlier. For homogeneous collections of objects
-                    // it would not matter.
-                    for (int i = this.Count - 1; i >= 0; --i)
-                    {
-                        this[i]?.Dispose();
-                    }
-                    this.Clear();
+                    this[i]?.Dispose();
                 }
-
-                disposedValue = true;
+                this.Clear();
             }
         }
 
diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
index 76bdee0cdd395..dbd0d80c53b5f 100644
--- a/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
+++ b/csharp/src/Microsoft.ML.OnnxRuntime/NativeMethods.cs
@@ -491,12 +491,6 @@ IntPtr[] outputValues /* An array of output value pointers. Array must be alloca
         public delegate IntPtr /*(OrtStatus*)*/ DOrtDisablePrePacking(IntPtr /* OrtSessionOptions* */ options);
         public static DOrtDisablePrePacking OrtDisablePrePacking;
 
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtEnablePrePacking(IntPtr /* OrtSessionOptions* */ options);
-        public static DOrtEnablePrePacking OrtEnablePrePacking;
-
-        public delegate IntPtr /*(OrtStatus*)*/ DOrtDisablePrePacking(IntPtr /* OrtSessionOptions* */ options);
-        public static DOrtDisablePrePacking OrtDisablePrePacking;
-
         ///**
         //  * The order of invocation indicates the preference order as well. In other words call this method
         //  * on your most preferred execution provider first followed by the less preferred ones.
diff --git a/include/onnxruntime/core/session/onnxruntime_c_api.h b/include/onnxruntime/core/session/onnxruntime_c_api.h
index 66ad1d9f53015..14b4f2c5079e7 100644
--- a/include/onnxruntime/core/session/onnxruntime_c_api.h
+++ b/include/onnxruntime/core/session/onnxruntime_c_api.h
@@ -819,6 +819,51 @@ struct OrtApi {
                   _Inout_ OrtSessionOptions* options, _In_ const char* dim_name,
                   _In_ int64_t dim_value);
 
+  /**
+   * \param out_ptr will hold a pointer to the array of char *
+   * representing available providers.
+   * \param provider_length is a pointer to an int variable where
+   * the number of available providers will be added.
+   * The caller is responsible for freeing each char * and the pointer
+   * array by calling ReleaseAvailableProviders().
+   */
+  ORT_API2_STATUS(GetAvailableProviders, _Outptr_ char*** out_ptr,
+                  _In_ int* provider_length);
+
+  /**
+   * \param ptr is the pointer to an array of available providers you
+   * get after calling GetAvailableProviders().
+   * \param providers_length is the number of available providers.
+   */
+  ORT_API2_STATUS(ReleaseAvailableProviders, _In_ char** ptr,
+                  _In_ int providers_length);
+
+  /**
+     * \param value - A tensor created from OrtCreateTensor... function.
+     * \param index - index of string tensor element, length of element at index will be returned.
+     * \param out - number of UTF-8 bytes that the string contains
+     */
+  ORT_API2_STATUS(GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out);
+
+  /**
+     * \param s string element contents in UTF-8 encoding. The string is NOT null-terminated.
+     * \param value A tensor created from OrtCreateTensor... function.
+     * \param s_len element length, get it from OrtGetStringTensorElementLength.
+     * \param index offset of element of tensor to return.
+     */
+  ORT_API2_STATUS(GetStringTensorElement, _In_ const OrtValue* value, size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s);
+
+  /**
+     * \param value - A tensor created from OrtCreateTensor... function.
+     * \param s - A null terminated UTF-8 encoded string.
+     * \param index - index of string tensor element to fill 
+     */
+  ORT_API2_STATUS(FillStringTensorElement, _Inout_ OrtValue* value, _In_ const char* s, size_t index);
+
+  // Control pre-packing of initialized constant tensors
+  ORT_API2_STATUS(EnablePrePacking, _Inout_ OrtSessionOptions* options);
+  ORT_API2_STATUS(DisablePrePacking, _Inout_ OrtSessionOptions* options);
+
   /**
    * \param sess valid OrtSession instance
    * \para mem_info - valid OrtMemoryInfo instance
@@ -914,51 +959,6 @@ struct OrtApi {
    */
   void(ORT_API_CALL* ClearBoundInputs)(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
   void(ORT_API_CALL* ClearBoundOutputs)(_Inout_ OrtIoBinding* binding_ptr) NO_EXCEPTION ORT_ALL_ARGS_NONNULL;
-
-  /**
-   * \param out_ptr will hold a pointer to the array of char *
-   * representing available providers.
-   * \param provider_length is a pointer to an int variable where
-   * the number of available providers will be added.
-   * The caller is responsible for freeing each char * and the pointer
-   * array by calling ReleaseAvailableProviders().
-   */
-  ORT_API2_STATUS(GetAvailableProviders, _Outptr_ char*** out_ptr,
-                  _In_ int* provider_length);
-
-  /**
-   * \param ptr is the pointer to an array of available providers you
-   * get after calling GetAvailableProviders().
-   * \param providers_length is the number of available providers.
-   */
-  ORT_API2_STATUS(ReleaseAvailableProviders, _In_ char** ptr,
-                  _In_ int providers_length);
-
-  /**
-     * \param value - A tensor created from OrtCreateTensor... function.
-     * \param index - index of string tensor element, length of element at index will be returned.
-     * \param out - number of UTF-8 bytes that the string contains
-     */
-  ORT_API2_STATUS(GetStringTensorElementLength, _In_ const OrtValue* value, size_t index, _Out_ size_t* out);
-
-  /**
-     * \param s string element contents in UTF-8 encoding. The string is NOT null-terminated.
-     * \param value A tensor created from OrtCreateTensor... function.
-     * \param s_len element length, get it from OrtGetStringTensorElementLength.
-     * \param index offset of element of tensor to return.
-     */
-  ORT_API2_STATUS(GetStringTensorElement, _In_ const OrtValue* value, size_t s_len, size_t index, _Out_writes_bytes_all_(s_len) void* s);
-
-  /**
-     * \param value - A tensor created from OrtCreateTensor... function.
-     * \param s - A null terminated UTF-8 encoded string.
-     * \param index - index of string tensor element to fill 
-     */
-  ORT_API2_STATUS(FillStringTensorElement, _Inout_ OrtValue* value, _In_ const char* s, size_t index);
-  
-  // Control pre-packing of initialized constant tensors
-  ORT_API2_STATUS(EnablePrePacking, _Inout_ OrtSessionOptions* options);
-  ORT_API2_STATUS(DisablePrePacking, _Inout_ OrtSessionOptions* options);
 };
 
 /*
diff --git a/onnxruntime/core/session/onnxruntime_c_api.cc b/onnxruntime/core/session/onnxruntime_c_api.cc
index 39664b07f1933..f8de3d0512e43 100644
--- a/onnxruntime/core/session/onnxruntime_c_api.cc
+++ b/onnxruntime/core/session/onnxruntime_c_api.cc
@@ -1825,7 +1825,16 @@ static constexpr OrtApi ort_api_1_to_4 = {
 
     // Version 4 - In development
 
-    // Allocator extension and Binding APIs are exposed via C# API, do not move
+    &OrtApis::GetAvailableProviders,
+    &OrtApis::ReleaseAvailableProviders,
+    &OrtApis::GetStringTensorElementLength,
+    &OrtApis::GetStringTensorElement,
+    &OrtApis::FillStringTensorElement,
+    &OrtApis::EnablePrePacking,
+    &OrtApis::DisablePrePacking,
+
+    // IoBinding and above are propagated in the same order to C# API
+    // Do not move
     &OrtApis::CreateAllocator,
     &OrtApis::ReleaseAllocator,
     &OrtApis::RunWithBinding,
@@ -1839,14 +1848,6 @@ static constexpr OrtApi ort_api_1_to_4 = {
     &OrtApis::ClearBoundInputs,
     &OrtApis::ClearBoundOutputs,
 
-    // feel free to add/remove/rearrange here
-    &OrtApis::GetAvailableProviders,
-    &OrtApis::ReleaseAvailableProviders,
-    &OrtApis::GetStringTensorElementLength,
-    &OrtApis::GetStringTensorElement,
-    &OrtApis::FillStringTensorElement,
-    &OrtApis::EnablePrePacking,
-    &OrtApis::DisablePrePacking,
 };
 
 // Assert to do a limited check to ensure Version 1 of OrtApi never changes (will detect an addition or deletion but not if they cancel out each other)