Skip to content

Commit

Permalink
feat: GPU ORT (#371)
Browse files Browse the repository at this point in the history
  • Loading branch information
MistEO authored Oct 1, 2024
1 parent ef707e4 commit e00f5f8
Show file tree
Hide file tree
Showing 16 changed files with 210 additions and 13 deletions.
6 changes: 6 additions & 0 deletions include/MaaFramework/MaaDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,12 @@ typedef MaaOption MaaResOption;
enum MaaResOptionEnum
{
MaaResOption_Invalid = 0,

/// Use the specified inference device, the default is INT32_MAX, which means CPU.
/// Please set this option before loading the model.
///
/// value: int32_t, eg: 0; val_size: sizeof(int32_t)
MaaResOption_GpuId = 1,
};

typedef MaaOption MaaCtrlOption;
Expand Down
16 changes: 16 additions & 0 deletions source/MaaFramework/Resource/OCRResMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,22 @@ OCRResMgr::OCRResMgr()
option_.UseOrtBackend();
}

bool OCRResMgr::use_cpu()
{
LogInfo;

option_.UseCpu();
return true;
}

bool OCRResMgr::use_gpu(int device_id)
{
LogInfo << VAR(device_id);

option_.UseGpu(device_id);
return true;
}

bool OCRResMgr::lazy_load(const std::filesystem::path& path, bool is_base)
{
LogFunc << VAR(path) << VAR(is_base);
Expand Down
3 changes: 3 additions & 0 deletions source/MaaFramework/Resource/OCRResMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,9 @@ class OCRResMgr : public NonCopyable
{
public:
OCRResMgr();

bool use_cpu();
bool use_gpu(int device_id);
bool lazy_load(const std::filesystem::path& path, bool is_base);
void clear();

Expand Down
99 changes: 98 additions & 1 deletion source/MaaFramework/Resource/ONNXResMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,109 @@

#include <filesystem>
#include <ranges>
#include <unordered_set>

#ifdef _WIN32
#include "Utils/SafeWindows.hpp"
#endif

#if __has_include(<onnxruntime/dml_provider_factory.h>)
#define MAA_WITH_DML
#include <onnxruntime/dml_provider_factory.h>
#endif

#if __has_include(<onnxruntime/coreml_provider_factory.h>)
#define MAA_WITH_COREML
#include <onnxruntime/coreml_provider_factory.h>
#endif

#include "Utils/Logger.h"
#include "Utils/Platform.h"

MAA_RES_NS_BEGIN

ONNXResMgr::~ONNXResMgr()
{
if (gpu_device_id_) {
LogWarn << "GPU is enabled, leaking resources";

// FIXME: intentionally leak ort objects to avoid crash (double free?)
// https://github.com/microsoft/onnxruntime/issues/15174
for (auto& session : classifiers_ | std::views::values) {
auto leak_session = new Ort::Session(nullptr);
*leak_session = std::move(*session);
}
for (auto& session : detectors_ | std::views::values) {
auto leak_session = new Ort::Session(nullptr);
*leak_session = std::move(*session);
}

auto leak_options = new Ort::SessionOptions(nullptr);
*leak_options = std::move(options_);
}
}

bool ONNXResMgr::use_cpu()
{
LogInfo;

options_ = {};
gpu_device_id_ = std::nullopt;
return true;
}

bool ONNXResMgr::use_gpu(int device_id)
{
LogInfo << VAR(device_id);

if (gpu_device_id_ && *gpu_device_id_ == device_id) {
LogWarn << "GPU is already enabled";
return true;
}
options_ = {};

auto all_providers_vec = Ort::GetAvailableProviders();
std::unordered_set<std::string> all_providers(
std::make_move_iterator(all_providers_vec.begin()),
std::make_move_iterator(all_providers_vec.end()));
LogInfo << VAR(all_providers);

if (all_providers.contains("CUDAExecutionProvider")) {
OrtCUDAProviderOptions cuda_options {};
cuda_options.device_id = device_id;
options_.AppendExecutionProvider_CUDA(cuda_options);

LogInfo << "Using CUDA execution provider with device_id " << device_id;
}
#ifdef MAA_WITH_DML
else if (all_providers.contains("DmlExecutionProvider")) {
auto status = OrtSessionOptionsAppendExecutionProvider_DML(options_, device_id);
if (!Ort::Status(status).IsOK()) {
LogError << "Failed to append DML execution provider with device_id " << device_id;
return false;
}
LogInfo << "Using DML execution provider with device_id " << device_id;
}
#endif
#ifdef MAA_WITH_COREML
else if (all_providers.contains("CoreMLExecutionProvider")) {
auto status = OrtSessionOptionsAppendExecutionProvider_CoreML((OrtSessionOptions*)options_, 0);
if (!Ort::Status(status).IsOK()) {
LogError << "Failed to append CoreML execution provider";
return false;
}
LogInfo << "Using CoreML execution provider";
}
#endif
else {
LogError << "No supported execution provider found";
return false;
}

gpu_device_id_ = device_id;
return true;
}

bool ONNXResMgr::lazy_load(const std::filesystem::path& path, bool is_base)
{
LogFunc << VAR(path) << VAR(is_base);
Expand Down Expand Up @@ -71,7 +168,7 @@ std::shared_ptr<Ort::Session> ONNXResMgr::load(const std::string& name, const st
}

LogTrace << VAR(path);
Ort::Session session(m_env, path.c_str(), m_options);
Ort::Session session(env_, path.c_str(), options_);
return std::make_shared<Ort::Session>(std::move(session));
}

Expand Down
11 changes: 9 additions & 2 deletions source/MaaFramework/Resource/ONNXResMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include <filesystem>
#include <memory>
#include <optional>

#include <onnxruntime/onnxruntime_cxx_api.h>

Expand All @@ -17,7 +18,12 @@ class ONNXResMgr : public NonCopyable
inline static const std::filesystem::path kClassifierDir = "classify";
inline static const std::filesystem::path kDetectorDir = "detect";

~ONNXResMgr();

public:
bool use_cpu();
bool use_gpu(int device_id);

bool lazy_load(const std::filesystem::path& path, bool is_base);
void clear();

Expand All @@ -31,8 +37,9 @@ class ONNXResMgr : public NonCopyable
std::vector<std::filesystem::path> classifier_roots_;
std::vector<std::filesystem::path> detector_roots_;

Ort::Env m_env;
Ort::SessionOptions m_options;
Ort::Env env_;
Ort::SessionOptions options_;
std::optional<int> gpu_device_id_;

mutable std::unordered_map<std::string, std::shared_ptr<Ort::Session>> classifiers_;
mutable std::unordered_map<std::string, std::shared_ptr<Ort::Session>> detectors_;
Expand Down
37 changes: 33 additions & 4 deletions source/MaaFramework/Resource/ResourceMgr.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,16 @@ ResourceMgr::~ResourceMgr()

bool ResourceMgr::set_option(MaaResOption key, MaaOptionValue value, MaaOptionValueSize val_size)
{
std::ignore = key;
std::ignore = value;
std::ignore = val_size;
LogFunc << VAR(key) << VAR_VOIDP(value) << VAR(val_size);

return false;
switch (key) {
case MaaResOption_GpuId:
return set_gpu_id(value, val_size);

default:
LogError << "Unknown key" << VAR(key) << VAR(value);
return false;
}
}

MaaResId ResourceMgr::post_path(const std::filesystem::path& path)
Expand Down Expand Up @@ -243,6 +248,30 @@ CustomActionSession ResourceMgr::custom_action(const std::string& name) const
return it->second;
}

bool ResourceMgr::set_gpu_id(MaaOptionValue value, MaaOptionValueSize val_size)
{
LogFunc << VAR_VOIDP(value) << VAR(val_size);

if (val_size != sizeof(int32_t)) {
LogError << "invalid size" << VAR(val_size);
return false;
}

int32_t gpu_id = *reinterpret_cast<int*>(value);
LogInfo << VAR(gpu_id);

if (gpu_id == INT32_MAX) {
onnx_res_.use_cpu();
ocr_res_.use_cpu();
}
else {
onnx_res_.use_gpu(gpu_id);
ocr_res_.use_gpu(gpu_id);
}

return true;
}

bool ResourceMgr::run_load(typename AsyncRunner<std::filesystem::path>::Id id, std::filesystem::path path)
{
LogFunc << VAR(id) << VAR(path);
Expand Down
2 changes: 2 additions & 0 deletions source/MaaFramework/Resource/ResourceMgr.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,8 @@ class ResourceMgr : public MaaResource
CustomActionSession custom_action(const std::string& name) const;

private:
bool set_gpu_id(MaaOptionValue value, MaaOptionValueSize val_size);

bool run_load(typename AsyncRunner<std::filesystem::path>::Id id, std::filesystem::path path);
bool load(const std::filesystem::path& path);
bool check_stop();
Expand Down
2 changes: 0 additions & 2 deletions source/MaaFramework/Vision/NeuralNetworkClassifier.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,6 @@ NeuralNetworkClassifier::Result NeuralNetworkClassifier::classify() const
cv::Size input_image_size(static_cast<int>(input_shape[3]), static_cast<int>(input_shape[2]));
cv::resize(image, image, input_image_size, 0, 0, cv::INTER_AREA);
std::vector<float> input = image_to_tensor(image);

// TODO: GPU
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);

Ort::Value input_tensor =
Expand Down
2 changes: 0 additions & 2 deletions source/MaaFramework/Vision/NeuralNetworkDetector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,6 @@ NeuralNetworkDetector::ResultsVec NeuralNetworkDetector::detect() const
cv::Size input_image_size(static_cast<int>(input_shape[3]), static_cast<int>(input_shape[2]));
cv::resize(image, image, input_image_size, 0, 0, cv::INTER_AREA);
std::vector<float> input = image_to_tensor(image);

// TODO: GPU
auto memory_info = Ort::MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeDefault);

Ort::Value input_tensor =
Expand Down
2 changes: 1 addition & 1 deletion source/MaaFramework/Vision/OCRer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ OCRer::ResultsVec OCRer::predict_det_and_rec(const cv::Mat& image_roi) const
fastdeploy::vision::OCRResult ocr_result;
bool ret = ocrer_->Predict(image_roi, &ocr_result);
if (!ret) {
LogWarn << "inferencer return false" << VAR(ocrer_) << VAR(image_) << VAR(image_roi);
LogWarn << "predict return false" << VAR(ocrer_) << VAR(image_) << VAR(image_roi);
return {};
}

Expand Down
2 changes: 2 additions & 0 deletions source/MaaProjectInterface/Impl/Configurator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,8 @@ std::optional<RuntimeParam> Configurator::generate_runtime() const
}
}

runtime.gpu = config_.gpu;

return runtime;
}

Expand Down
1 change: 1 addition & 0 deletions source/MaaProjectInterface/Impl/Runner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ bool Runner::run(
}

auto resource_handle = MaaResourceCreate(notify, notify_trans_arg);
MaaResourceSetOption(resource_handle, MaaResOption_GpuId, const_cast<int32_t*>(&param.gpu), sizeof(int32_t));

MaaId cid = MaaControllerPostConnection(controller_handle);
MaaId rid = 0;
Expand Down
10 changes: 10 additions & 0 deletions source/binding/Python/maa/define.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,7 @@ class MaaStatusEnum(IntEnum):
MaaOption = ctypes.c_int32
MaaGlobalOption = MaaOption
MaaCtrlOption = MaaOption
MaaResOption = MaaOption


class MaaGlobalOptionEnum:
Expand Down Expand Up @@ -104,6 +105,15 @@ class MaaCtrlOptionEnum:
Recording = 5


class MaaResOptionEnum:
Invalid = 0

# Use the specified inference device, the default is INT32_MAX, which means CPU.
# Please set this option before loading the model.
# value: int32_t, eg: 0; val_size: sizeof(int32_t)
GpuId = 1


MaaAdbScreencapMethod = ctypes.c_uint64


Expand Down
23 changes: 23 additions & 0 deletions source/binding/Python/maa/resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,21 @@ def loaded(self) -> bool:
def clear(self) -> bool:
return bool(Library.framework.MaaResourceClear(self._handle))

def set_gpu(self, device_id: int) -> bool:
cint = ctypes.c_int32(device_id)
return bool(
Library.framework.MaaResourceSetOption(
self._handle,
MaaResOptionEnum.GpuId,
ctypes.pointer(cint),
ctypes.sizeof(ctypes.c_int32),
)
)

def set_cpu(self) -> bool:
INT32_MAX = 2147483647 # means CPU
return self.set_gpu(INT32_MAX)

def register_custom_recognition(
self, name: str, recognition: "CustomRecognition" # type: ignore
) -> bool:
Expand Down Expand Up @@ -189,6 +204,14 @@ def _set_api_properties():
MaaStringBufferHandle,
]

Library.framework.MaaResourceSetOption.restype = MaaBool
Library.framework.MaaResourceSetOption.argtypes = [
MaaResourceHandle,
MaaResOption,
MaaOptionValue,
MaaOptionValueSize,
]

Library.framework.MaaResourceRegisterCustomRecognition.restype = MaaBool
Library.framework.MaaResourceRegisterCustomRecognition.argtypes = [
MaaResourceHandle,
Expand Down
4 changes: 3 additions & 1 deletion source/include/ProjectInterface/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,9 @@ struct Configuration
Win32Config win32;
std::string resource;
std::vector<Task> task;
int32_t gpu = INT32_MAX;

MEO_JSONIZATION(controller, MEO_OPT adb, MEO_OPT win32, resource, task);
MEO_JSONIZATION(controller, MEO_OPT adb, MEO_OPT win32, resource, task, MEO_OPT gpu);
};

struct RuntimeParam
Expand Down Expand Up @@ -184,6 +185,7 @@ struct RuntimeParam
std::vector<std::string> resource_path;

std::vector<Task> task;
int32_t gpu = INT32_MAX;
};

struct CustomRecognitionSession
Expand Down
3 changes: 3 additions & 0 deletions test/python/binding_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,10 @@ def run(

def api_test():
r1 = Resource()
r1.set_gpu(0)
r1.set_gpu(1)
r2 = Resource()
r2.set_cpu()
r2.post_path("C:/_maafw_testing_/aaabbbccc").wait()
t1 = Tasker()
t2 = Tasker()
Expand Down

0 comments on commit e00f5f8

Please sign in to comment.