From 57a4efa10138f5a6f168b1f411de0368d1351adb Mon Sep 17 00:00:00 2001 From: Oleg Pipikin Date: Thu, 19 Sep 2024 16:32:44 +0000 Subject: [PATCH] Enable mmap for reading model from cache --- .../dev_api/openvino/runtime/shared_buffer.hpp | 15 +++++++++++++++ src/inference/src/cache_manager.hpp | 17 +++++++++++++---- src/inference/src/dev/core_impl.cpp | 13 +++++++------ src/inference/src/dev/core_impl.hpp | 3 ++- 4 files changed, 37 insertions(+), 11 deletions(-) diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index c4420b9e3fa822..2e6cb1983735f0 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -28,4 +28,19 @@ class SharedBuffer : public ov::AlignedBuffer { T _shared_object; }; + +/// \brief SharedStreamBuffer class to store pointer to pre-acclocated buffer and provide streambuf interface. +template +class SharedStreamBuffer final : public std::stringbuf { +public: + SharedStreamBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) { + basic_streambuf::pubsetbuf(data, size); + } + + SharedStreamBuffer() = delete; + +private: + T _shared_object; +}; + } // namespace ov diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index b14fe2abe18a7e..c9db11815f47e0 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -15,6 +15,8 @@ #include #include "openvino/util/file_util.hpp" +#include "openvino/util/mmap_object.hpp" +#include "openvino/runtime/shared_buffer.hpp" namespace ov { @@ -78,7 +80,7 @@ class ICacheManager { * @param id Id of cache (hash of the model) * @param reader Lambda function to be called when input stream is created */ - virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0; + virtual void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) = 0; /** * @brief Callback when OpenVINO intends to remove cache entry @@ -129,13 +131,20 @@ class FileStorageCacheManager final : public ICacheManager { writer(stream); } - void read_cache_entry(const std::string& id, StreamReader reader) override { + void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) override { // Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C". ScopedLocale plocal_C(LC_ALL, "C"); auto blobFileName = getBlobFile(id); if (ov::util::file_exists(blobFileName)) { - std::ifstream stream(blobFileName, std::ios_base::binary); - reader(stream); + if (enable_mmap) { + auto mmap = ov::load_mmap_object(blobFileName); + SharedStreamBuffer> buf(mmap->data(), mmap->size(), mmap); + std::istream stream(&buf); + reader(stream); + } else { + std::ifstream stream(blobFileName, std::ios_base::binary); + reader(stream); + } } } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 237c246ab38bdc..69ab5df145f773 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -751,7 +751,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< parsed._config, ov::SoPtr{}, cacheContent); - }); + }, coreConfig.get_enable_mmap()); } else { res = plugin.compile_model(model, parsed._config); } @@ -780,7 +780,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::shared_ptr< std::unique_ptr lock = cacheGuard.get_hash_lock(cacheContent.blobId); res = load_model_from_cache(cacheContent, plugin, parsed._config, context, [&]() { return compile_model_and_cache(plugin, model, parsed._config, context, cacheContent); - }); + }, coreConfig.get_enable_mmap()); } else { res = plugin.compile_model(model, context, parsed._config); } @@ -807,7 +807,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod load_model_from_cache(cacheContent, plugin, parsed._config, ov::SoPtr{}, [&]() { auto model = read_model(model_path, std::string{}); return compile_model_and_cache(plugin, model, parsed._config, {}, cacheContent); - }); + }, coreConfig.get_enable_mmap()); } else { compiled_model = plugin.compile_model(model_path, parsed._config); } @@ -839,7 +839,7 @@ ov::SoPtr ov::CoreImpl::compile_model(const std::string& mod parsed._config, ov::SoPtr{}, cacheContent); - }); + }, coreConfig.get_enable_mmap()); } else { auto model = read_model(model_str, weights); compiled_model = plugin.compile_model(model, parsed._config); @@ -1401,13 +1401,14 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( ov::Plugin& plugin, const ov::AnyMap& config, const ov::SoPtr& context, - std::function()> compile_model_lambda) { + std::function()> compile_model_lambda, + bool enable_mmap) { ov::SoPtr compiled_model; struct HeaderException {}; OPENVINO_ASSERT(cacheContent.cacheManager != nullptr); try { - cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) { + cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, enable_mmap, [&](std::istream& networkStream) { OV_ITT_SCOPE(FIRST_INFERENCE, ov::itt::domains::LoadTime, "Core::load_model_from_cache::ReadStreamAndImport"); diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 40f2a15bb725e0..fd3201eb28fb96 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -154,7 +154,8 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this& context, - std::function()> compile_model_lambda); + std::function()> compile_model_lambda, + bool enable_mmap); bool device_supports_model_caching(const ov::Plugin& plugin) const;