Skip to content

Commit

Permalink
Enable mmap for reading model from cache
Browse files Browse the repository at this point in the history
  • Loading branch information
olpipi committed Sep 19, 2024
1 parent b368c31 commit 57a4efa
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 11 deletions.
15 changes: 15 additions & 0 deletions src/core/dev_api/openvino/runtime/shared_buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,4 +28,19 @@ class SharedBuffer : public ov::AlignedBuffer {
T _shared_object;
};


/// \brief SharedStreamBuffer class to store pointer to pre-acclocated buffer and provide streambuf interface.
template <typename T>
class SharedStreamBuffer final : public std::stringbuf {
public:
SharedStreamBuffer(char* data, size_t size, const T& shared_object) : _shared_object(shared_object) {
basic_streambuf::pubsetbuf(data, size);
}

SharedStreamBuffer() = delete;

private:
T _shared_object;
};

} // namespace ov
17 changes: 13 additions & 4 deletions src/inference/src/cache_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
#include <string>

#include "openvino/util/file_util.hpp"
#include "openvino/util/mmap_object.hpp"
#include "openvino/runtime/shared_buffer.hpp"

namespace ov {

Expand Down Expand Up @@ -78,7 +80,7 @@ class ICacheManager {
* @param id Id of cache (hash of the model)
* @param reader Lambda function to be called when input stream is created
*/
virtual void read_cache_entry(const std::string& id, StreamReader reader) = 0;
virtual void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) = 0;

/**
* @brief Callback when OpenVINO intends to remove cache entry
Expand Down Expand Up @@ -129,13 +131,20 @@ class FileStorageCacheManager final : public ICacheManager {
writer(stream);
}

void read_cache_entry(const std::string& id, StreamReader reader) override {
void read_cache_entry(const std::string& id, bool enable_mmap, StreamReader reader) override {
// Fix the bug caused by pugixml, which may return unexpected results if the locale is different from "C".
ScopedLocale plocal_C(LC_ALL, "C");
auto blobFileName = getBlobFile(id);
if (ov::util::file_exists(blobFileName)) {
std::ifstream stream(blobFileName, std::ios_base::binary);
reader(stream);
if (enable_mmap) {
auto mmap = ov::load_mmap_object(blobFileName);
SharedStreamBuffer<std::shared_ptr<MappedMemory>> buf(mmap->data(), mmap->size(), mmap);
std::istream stream(&buf);
reader(stream);
} else {
std::ifstream stream(blobFileName, std::ios_base::binary);
reader(stream);
}
}
}

Expand Down
13 changes: 7 additions & 6 deletions src/inference/src/dev/core_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -751,7 +751,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
parsed._config,
ov::SoPtr<ov::IRemoteContext>{},
cacheContent);
});
}, coreConfig.get_enable_mmap());
} else {
res = plugin.compile_model(model, parsed._config);
}
Expand Down Expand Up @@ -780,7 +780,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::shared_ptr<
std::unique_ptr<CacheGuardEntry> lock = cacheGuard.get_hash_lock(cacheContent.blobId);
res = load_model_from_cache(cacheContent, plugin, parsed._config, context, [&]() {
return compile_model_and_cache(plugin, model, parsed._config, context, cacheContent);
});
}, coreConfig.get_enable_mmap());
} else {
res = plugin.compile_model(model, context, parsed._config);
}
Expand All @@ -807,7 +807,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
load_model_from_cache(cacheContent, plugin, parsed._config, ov::SoPtr<ov::IRemoteContext>{}, [&]() {
auto model = read_model(model_path, std::string{});
return compile_model_and_cache(plugin, model, parsed._config, {}, cacheContent);
});
}, coreConfig.get_enable_mmap());
} else {
compiled_model = plugin.compile_model(model_path, parsed._config);
}
Expand Down Expand Up @@ -839,7 +839,7 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::compile_model(const std::string& mod
parsed._config,
ov::SoPtr<ov::IRemoteContext>{},
cacheContent);
});
}, coreConfig.get_enable_mmap());
} else {
auto model = read_model(model_str, weights);
compiled_model = plugin.compile_model(model, parsed._config);
Expand Down Expand Up @@ -1401,13 +1401,14 @@ ov::SoPtr<ov::ICompiledModel> ov::CoreImpl::load_model_from_cache(
ov::Plugin& plugin,
const ov::AnyMap& config,
const ov::SoPtr<ov::IRemoteContext>& context,
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda) {
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda,
bool enable_mmap) {
ov::SoPtr<ov::ICompiledModel> compiled_model;
struct HeaderException {};

OPENVINO_ASSERT(cacheContent.cacheManager != nullptr);
try {
cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, [&](std::istream& networkStream) {
cacheContent.cacheManager->read_cache_entry(cacheContent.blobId, enable_mmap, [&](std::istream& networkStream) {
OV_ITT_SCOPE(FIRST_INFERENCE,
ov::itt::domains::LoadTime,
"Core::load_model_from_cache::ReadStreamAndImport");
Expand Down
3 changes: 2 additions & 1 deletion src/inference/src/dev/core_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,8 @@ class CoreImpl : public ov::ICore, public std::enable_shared_from_this<ov::ICore
ov::Plugin& plugin,
const ov::AnyMap& config,
const ov::SoPtr<ov::IRemoteContext>& context,
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda);
std::function<ov::SoPtr<ov::ICompiledModel>()> compile_model_lambda,
bool enable_mmap);

bool device_supports_model_caching(const ov::Plugin& plugin) const;

Expand Down

0 comments on commit 57a4efa

Please sign in to comment.