diff --git a/src/core/dev_api/openvino/runtime/shared_buffer.hpp b/src/core/dev_api/openvino/runtime/shared_buffer.hpp index c4420b9e3fa822..d169a688c50e7d 100644 --- a/src/core/dev_api/openvino/runtime/shared_buffer.hpp +++ b/src/core/dev_api/openvino/runtime/shared_buffer.hpp @@ -8,7 +8,7 @@ namespace ov { -/// \brief SharedBuffer class to store pointer to pre-acclocated buffer. +/// \brief SharedBuffer class to store pointer to pre-acclocated buffer. Own the shared object. template class SharedBuffer : public ov::AlignedBuffer { public: @@ -28,4 +28,35 @@ class SharedBuffer : public ov::AlignedBuffer { T _shared_object; }; +/// \brief SharedStreamBuffer class to store pointer to pre-acclocated buffer and provide streambuf interface. +/// Can return ptr to shared memory and its size +class SharedStreamBuffer : public std::streambuf { +public: + SharedStreamBuffer(char* data, size_t size); + + // get data ptr and its size + char* data(); + size_t size(); + +protected: + // override std::streambuf methods + std::streamsize xsgetn(char* s, std::streamsize count) override; + int_type underflow() override; + int_type uflow() override; + std::streamsize showmanyc() override; + + char* m_data; + size_t m_size; + size_t m_offset; +}; + +/// \brief OwningSharedStreamBuffer is a SharedStreamBuffer which owns its shared object. +class OwningSharedStreamBuffer : public SharedStreamBuffer { +public: + OwningSharedStreamBuffer(char* data, size_t size, const std::shared_ptr& shared_obj); + +protected: + std::shared_ptr m_shared_obj; +}; + } // namespace ov diff --git a/src/core/src/shared_buffer.cpp b/src/core/src/shared_buffer.cpp new file mode 100644 index 00000000000000..6184b9957dbb2c --- /dev/null +++ b/src/core/src/shared_buffer.cpp @@ -0,0 +1,40 @@ +// Copyright (C) 2018-2024 Intel Corporation +// SPDX-License-Identifier: Apache-2.0 +// + +#include "openvino/runtime/shared_buffer.hpp" + +namespace ov { + SharedStreamBuffer::SharedStreamBuffer(char* data, size_t size) : m_data(data), m_size(size), m_offset(0) {} + + char* SharedStreamBuffer::data() { + return m_data; + } + + size_t SharedStreamBuffer::size() { + return m_size; + } + + std::streamsize SharedStreamBuffer::xsgetn(char* s, std::streamsize count) { + auto real_count = std::min(m_size - m_offset, count); + std::memcpy(s, m_data + m_offset, real_count); + m_offset += real_count; + return real_count; + } + + std::streambuf::int_type SharedStreamBuffer::underflow() { + return (m_size == m_offset) ? traits_type::eof() : traits_type::to_int_type(*(m_data + m_offset)); + } + + std::streambuf::int_type SharedStreamBuffer::uflow() { + return (m_size == m_offset) ? traits_type::eof() : traits_type::to_int_type(*(m_data + m_offset++)); + } + + std::streamsize SharedStreamBuffer::showmanyc() { + return m_size - m_offset; + } + + OwningSharedStreamBuffer::OwningSharedStreamBuffer(char* data, size_t size, const std::shared_ptr& shared_obj) + : SharedStreamBuffer(data, size), + m_shared_obj(shared_obj) {} +} // namespace ov diff --git a/src/inference/dev_api/openvino/runtime/iplugin.hpp b/src/inference/dev_api/openvino/runtime/iplugin.hpp index 8165e658c206f0..9cc05a7e8a2ee4 100644 --- a/src/inference/dev_api/openvino/runtime/iplugin.hpp +++ b/src/inference/dev_api/openvino/runtime/iplugin.hpp @@ -212,6 +212,12 @@ class OPENVINO_RUNTIME_API IPlugin : public std::enable_shared_from_this& get_executor_manager() const; + /** + * @brief Check if plugin support mmap for cached model reading. Returns false is the method is not overrided by plugin. + * @return true if mmap is supported, false otherwise + */ + virtual bool support_mmap_for_caching() const; + virtual ~IPlugin() = default; protected: diff --git a/src/inference/src/cache_manager.hpp b/src/inference/src/cache_manager.hpp index b14fe2abe18a7e..dc58fb2624ec65 100644 --- a/src/inference/src/cache_manager.hpp +++ b/src/inference/src/cache_manager.hpp @@ -14,7 +14,9 @@ #include #include +#include "openvino/runtime/shared_buffer.hpp" #include "openvino/util/file_util.hpp" +#include "openvino/util/mmap_object.hpp" namespace ov { @@ -76,9 +78,10 @@ class ICacheManager { * Otherwise, model will not be read from cache and will be loaded as usual * * @param id Id of cache (hash of the model) + * @param enable_mmap use mmap or ifstream to read model file * @param reader Lambda function to be called when input stream is created */ - virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0; + virtual void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) = 0; /** * @brief Callback when OpenVINO intends to remove cache entry @@ -129,13 +132,20 @@ class FileStorageCacheManager final : public ICacheManager { writer(stream); } - void read_cache_entry(const std::string& id, StreamReader reader) override { + void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) override { // Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C". ScopedLocale plocal_C(LC_ALL, "C"); auto blobFileName = getBlobFile(id); if (ov::util::file_exists(blobFileName)) { - std::ifstream stream(blobFileName, std::ios_base::binary); - reader(stream); + if (enable_mmap) { + auto mmap = ov::load_mmap_object(blobFileName); + OwningSharedStreamBuffer buf(mmap->data(), mmap->size(), mmap); + std::istream stream(&buf); + reader(stream); + } else { + std::ifstream stream(blobFileName, std::ios_base::binary); + reader(stream); + } } } diff --git a/src/inference/src/dev/core_impl.cpp b/src/inference/src/dev/core_impl.cpp index 237c246ab38bdc..9dd963281f300f 100644 --- a/src/inference/src/dev/core_impl.cpp +++ b/src/inference/src/dev/core_impl.cpp @@ -1354,7 +1354,7 @@ bool ov::CoreImpl::device_supports_internal_property(const ov::Plugin& plugin, c } bool ov::CoreImpl::device_supports_model_caching(const ov::Plugin& plugin) const { - return plugin.supports_model_caching(); + return plugin.supports_model_caching() == ov::Plugin::CachingMode::unsupported ? false : true; } bool ov::CoreImpl::device_supports_cache_dir(const ov::Plugin& plugin) const { @@ -1401,48 +1401,52 @@ ov::SoPtr ov::CoreImpl::load_model_from_cache( ov::Plugin& plugin, const ov::AnyMap& config, const ov::SoPtr& context, - std::function()> compile_model_lambda) { + std::function()> compile_model_lambda) const { ov::SoPtr compiled_model; struct HeaderException {}; OPENVINO_ASSERT(cacheContent.cacheManager != nullptr); try { - cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) { - OV_ITT_SCOPE(FIRST_INFERENCE, - ov::itt::domains::LoadTime, - "Core::load_model_from_cache::ReadStreamAndImport"); - try { - ov::CompiledBlobHeader header; - networkStream >> header; - if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { - // Original file is changed, don't use cache - OPENVINO_THROW("Original model file is changed"); - } - if (util::contains(plugin.get_property(ov::internal::supported_properties), - ov::internal::compiled_model_runtime_properties_supported.name())) { - ov::AnyMap compiled_model_runtime_properties = { - {ov::internal::compiled_model_runtime_properties.name(), - std::string(header.get_runtime_info())}}; - auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(), - compiled_model_runtime_properties); - if (!res.as()) { - OPENVINO_THROW("Original model runtime properties have been changed, not supported anymore!"); + cacheContent.cacheManager->read_cache_entry( + cacheContent.blobId, + coreConfig.get_enable_mmap() && plugin.supports_model_caching() == ov::Plugin::CachingMode::mmap, + [&](std::istream& networkStream) { + OV_ITT_SCOPE(FIRST_INFERENCE, + ov::itt::domains::LoadTime, + "Core::load_model_from_cache::ReadStreamAndImport"); + try { + ov::CompiledBlobHeader header; + networkStream >> header; + if (header.get_file_info() != ov::ModelCache::calculate_file_info(cacheContent.modelPath)) { + // Original file is changed, don't use cache + OPENVINO_THROW("Original model file is changed"); } - } else { - if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) { - // Build number mismatch, don't use this cache - OPENVINO_THROW("Version does not match"); + if (util::contains(plugin.get_property(ov::internal::supported_properties), + ov::internal::compiled_model_runtime_properties_supported.name())) { + ov::AnyMap compiled_model_runtime_properties = { + {ov::internal::compiled_model_runtime_properties.name(), + std::string(header.get_runtime_info())}}; + auto res = plugin.get_property(ov::internal::compiled_model_runtime_properties_supported.name(), + compiled_model_runtime_properties); + if (!res.as()) { + OPENVINO_THROW( + "Original model runtime properties have been changed, not supported anymore!"); + } + } else { + if (header.get_openvino_version() != ov::get_openvino_version().buildNumber) { + // Build number mismatch, don't use this cache + OPENVINO_THROW("Version does not match"); + } } + } catch (...) { + throw HeaderException(); } - } catch (...) { - throw HeaderException(); - } - ov::AnyMap update_config = config; - update_config[ov::loaded_from_cache.name()] = true; - compiled_model = context ? plugin.import_model(networkStream, context, update_config) - : plugin.import_model(networkStream, update_config); - }); + ov::AnyMap update_config = config; + update_config[ov::loaded_from_cache.name()] = true; + compiled_model = context ? plugin.import_model(networkStream, context, update_config) + : plugin.import_model(networkStream, update_config); + }); } catch (const HeaderException&) { // For these exceptions just remove old cache and set that import didn't work cacheContent.cacheManager->remove_cache_entry(cacheContent.blobId); diff --git a/src/inference/src/dev/core_impl.hpp b/src/inference/src/dev/core_impl.hpp index 40f2a15bb725e0..82e439d012711b 100644 --- a/src/inference/src/dev/core_impl.hpp +++ b/src/inference/src/dev/core_impl.hpp @@ -149,12 +149,12 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this& context, const CacheContent& cacheContent) const; - static ov::SoPtr load_model_from_cache( + ov::SoPtr load_model_from_cache( const CacheContent& cacheContent, ov::Plugin& plugin, const ov::AnyMap& config, const ov::SoPtr& context, - std::function()> compile_model_lambda); + std::function()> compile_model_lambda) const; bool device_supports_model_caching(const ov::Plugin& plugin) const; diff --git a/src/inference/src/dev/iplugin.cpp b/src/inference/src/dev/iplugin.cpp index 1049e39bee6f49..35610d8077d2b4 100644 --- a/src/inference/src/dev/iplugin.cpp +++ b/src/inference/src/dev/iplugin.cpp @@ -67,6 +67,10 @@ std::shared_ptr ov::IPlugin::get_core() const { return m_core.lock(); } +bool ov::IPlugin::support_mmap_for_caching() const { + return false; +} + const std::shared_ptr& ov::IPlugin::get_executor_manager() const { return m_executor_manager; } diff --git a/src/inference/src/dev/plugin.cpp b/src/inference/src/dev/plugin.cpp index 288389c46db859..cc8c5099241c8f 100644 --- a/src/inference/src/dev/plugin.cpp +++ b/src/inference/src/dev/plugin.cpp @@ -101,10 +101,14 @@ ov::Any ov::Plugin::get_property(const std::string& name, const AnyMap& argument return {m_ptr->get_property(name, arguments), {m_so}}; } -bool ov::Plugin::supports_model_caching() const { - bool supported(false); - supported = util::contains(get_property(ov::supported_properties), ov::device::capabilities) && - util::contains(get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT) && - util::contains(get_property(ov::internal::supported_properties), ov::internal::caching_properties); +ov::Plugin::CachingMode ov::Plugin::supports_model_caching() const { + ov::Plugin::CachingMode supported = ov::Plugin::CachingMode::unsupported; + if (util::contains(get_property(ov::supported_properties), ov::device::capabilities) && + util::contains(get_property(ov::device::capabilities), ov::device::capability::EXPORT_IMPORT) && + util::contains(get_property(ov::internal::supported_properties), ov::internal::caching_properties)) { + bool support_mmap = false; + OV_PLUGIN_CALL_STATEMENT(support_mmap = m_ptr->support_mmap_for_caching();); + supported = support_mmap ? ov::Plugin::CachingMode::mmap : ov::Plugin::CachingMode::legacy; + } return supported; } diff --git a/src/inference/src/dev/plugin.hpp b/src/inference/src/dev/plugin.hpp index 9eeed484840fff..9cc1a1a702cb02 100644 --- a/src/inference/src/dev/plugin.hpp +++ b/src/inference/src/dev/plugin.hpp @@ -74,7 +74,13 @@ class Plugin { T get_property(const ov::Property& property, const AnyMap& arguments) const { return get_property(property.name(), arguments).template as(); } - bool supports_model_caching() const; + + enum class CachingMode { + legacy, + mmap, + unsupported + }; + CachingMode supports_model_caching() const; }; } // namespace ov diff --git a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp index 28a20fa737da76..9f7af453ac99d3 100644 --- a/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp +++ b/src/plugins/intel_gpu/include/intel_gpu/plugin/plugin.hpp @@ -64,6 +64,7 @@ class Plugin : public ov::IPlugin { const ov::AnyMap& properties) const override; ov::SoPtr create_context(const ov::AnyMap& remote_properties) const override; ov::SoPtr get_default_context(const ov::AnyMap& remote_properties) const override; + bool support_mmap_for_caching() const override; }; } // namespace intel_gpu diff --git a/src/plugins/intel_gpu/src/plugin/plugin.cpp b/src/plugins/intel_gpu/src/plugin/plugin.cpp index 7ee587e612ad3d..a28769eaa9085c 100644 --- a/src/plugins/intel_gpu/src/plugin/plugin.cpp +++ b/src/plugins/intel_gpu/src/plugin/plugin.cpp @@ -219,6 +219,10 @@ ov::SoPtr Plugin::get_default_context(const AnyMap& params) return get_default_context(device_id); } +bool Plugin::support_mmap_for_caching() const { + return true; +} + void Plugin::set_property(const ov::AnyMap &config) { auto update_config = [](ExecutionConfig& config, const ov::AnyMap& user_config) { config.set_user_property(user_config);