diff --git a/modules/calibrator.cpp b/modules/calibrator.cpp index 773a241..bfdf307 100644 --- a/modules/calibrator.cpp +++ b/modules/calibrator.cpp @@ -27,13 +27,15 @@ SOFTWARE. #include #include #include +#include +#include +#include Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, - const std::string& calibImagesPath, - const std::string& calibTableFilePath, - const uint64_t& inputSize, const uint32_t& inputH, - const uint32_t& inputW, const std::string& inputBlobName, - const std::string &s_net_type_) : + const std::string& calibImagesPath, + const std::string& calibTableFilePath, + const uint64_t& inputSize, const uint32_t& inputH, + const uint32_t& inputW, const std::string& inputBlobName, const std::string &s_net_type_) : m_BatchSize(batchSize), m_InputH(inputH), m_InputW(inputW), @@ -42,14 +44,16 @@ Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const st m_InputBlobName(inputBlobName), m_CalibTableFilePath(calibTableFilePath), m_ImageIndex(0), - _s_net_type(s_net_type_) + _s_net_type(s_net_type_) { if (!fileExists(m_CalibTableFilePath, false)) { - m_ImageList = loadImageList(calibImages, calibImagesPath); - m_ImageList.resize(static_cast(m_ImageList.size() / m_BatchSize) * m_BatchSize); - std::random_shuffle(m_ImageList.begin(), m_ImageList.end(), - [](int i) { return rand() % i; }); + std::random_device rng; + std::mt19937 urng(rng()); + + m_ImageList = loadImageList(calibImages, calibImagesPath); + m_ImageList.resize(static_cast(m_ImageList.size() / m_BatchSize) * m_BatchSize); + std::shuffle(m_ImageList.begin(), m_ImageList.end(), urng); } NV_CUDA_CHECK(cudaMalloc(&m_DeviceInput, m_InputCount * sizeof(float))); @@ -57,7 +61,7 @@ Int8EntropyCalibrator::Int8EntropyCalibrator(const uint32_t& batchSize, const st Int8EntropyCalibrator::~Int8EntropyCalibrator() { NV_CUDA_CHECK(cudaFree(m_DeviceInput)); } -bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int nbBindings) +bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int /*nbBindings*/) noexcept { if (m_ImageIndex + m_BatchSize >= m_ImageList.size()) return false; @@ -69,16 +73,16 @@ bool Int8EntropyCalibrator::getBatch(void* bindings[], const char* names[], int } m_ImageIndex += m_BatchSize; - cv::Mat trtInput = blobFromDsImages(dsImages, m_InputH, m_InputW); + blobFromDsImages(dsImages, m_blob, m_InputH, m_InputW); - NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, trtInput.ptr(0), m_InputCount * sizeof(float), + NV_CUDA_CHECK(cudaMemcpy(m_DeviceInput, m_blob.ptr(0), m_InputCount * sizeof(float), cudaMemcpyHostToDevice)); assert(!strcmp(names[0], m_InputBlobName.c_str())); bindings[0] = m_DeviceInput; return true; } -const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) +const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) noexcept { void* output; m_CalibrationCache.clear(); @@ -105,10 +109,10 @@ const void* Int8EntropyCalibrator::readCalibrationCache(size_t& length) return output; } -void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) +void Int8EntropyCalibrator::writeCalibrationCache(const void* cache, size_t length) noexcept { assert(!m_CalibTableFilePath.empty()); std::ofstream output(m_CalibTableFilePath, std::ios::binary); output.write(reinterpret_cast(cache), length); output.close(); -} \ No newline at end of file +} diff --git a/modules/calibrator.h b/modules/calibrator.h index 9c09639..09b7970 100644 --- a/modules/calibrator.h +++ b/modules/calibrator.h @@ -35,13 +35,13 @@ class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 Int8EntropyCalibrator(const uint32_t& batchSize, const std::string& calibImages, const std::string& calibImagesPath, const std::string& calibTableFilePath, const uint64_t& inputSize, const uint32_t& inputH, const uint32_t& inputW, - const std::string& inputBlobName,const std::string &s_net_type_); + const std::string& inputBlobName, const std::string &s_net_type_); virtual ~Int8EntropyCalibrator(); - int getBatchSize() const override { return m_BatchSize; } - bool getBatch(void* bindings[], const char* names[], int nbBindings) override; - const void* readCalibrationCache(size_t& length) override; - void writeCalibrationCache(const void* cache, size_t length) override; + int getBatchSize() const noexcept override { return m_BatchSize; } + bool getBatch(void* bindings[], const char* names[], int nbBindings) noexcept override; + const void* readCalibrationCache(size_t& length) noexcept override; + void writeCalibrationCache(const void* cache, size_t length) noexcept override; private: const uint32_t m_BatchSize; @@ -50,13 +50,15 @@ class Int8EntropyCalibrator : public nvinfer1::IInt8EntropyCalibrator2 const uint64_t m_InputSize; const uint64_t m_InputCount; const std::string m_InputBlobName; - const std::string _s_net_type; + const std::string _s_net_type; const std::string m_CalibTableFilePath{nullptr}; uint32_t m_ImageIndex; bool m_ReadCache{true}; void* m_DeviceInput{nullptr}; std::vector m_ImageList; std::vector m_CalibrationCache; + + cv::Mat m_blob; }; -#endif \ No newline at end of file +#endif diff --git a/modules/chunk.cu b/modules/chunk.cu index 1eb497b..256815a 100644 --- a/modules/chunk.cu +++ b/modules/chunk.cu @@ -17,41 +17,34 @@ namespace nvinfer1 { - Chunk::Chunk() - { - - } Chunk::Chunk(const void* buffer, size_t size) { assert(size == sizeof(_n_size_split)); _n_size_split = *reinterpret_cast(buffer); - } - Chunk::~Chunk() - { + } - } - int Chunk::getNbOutputs() const + int Chunk::getNbOutputs() const noexcept { return 2; } - Dims Chunk::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) + Dims Chunk::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept { assert(nbInputDims == 1); assert(index == 0 || index == 1); return Dims3(inputs[0].d[0] / 2, inputs[0].d[1], inputs[0].d[2]); } - int Chunk::initialize() + int Chunk::initialize() noexcept { return 0; } - void Chunk::terminate() + void Chunk::terminate() noexcept { } - size_t Chunk::getWorkspaceSize(int maxBatchSize) const + size_t Chunk::getWorkspaceSize(int maxBatchSize) const noexcept { return 0; } @@ -60,7 +53,16 @@ namespace nvinfer1 const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) + cudaStream_t stream)noexcept + { + return enqueue(batchSize, inputs, (void* const*)outputs, workspace, stream); + } + + int Chunk::enqueue(int batchSize, + const void* const* inputs, + void* const* outputs, + void* workspace, + cudaStream_t stream) noexcept { //batch for (int b = 0; b < batchSize; ++b) @@ -68,73 +70,104 @@ namespace nvinfer1 NV_CUDA_CHECK(cudaMemcpy((char*)outputs[0] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice)); NV_CUDA_CHECK(cudaMemcpy((char*)outputs[1] + b * _n_size_split, (char*)inputs[0] + b * 2 * _n_size_split + _n_size_split, _n_size_split, cudaMemcpyDeviceToDevice)); } - // NV_CUDA_CHECK(cudaMemcpy(outputs[0], inputs[0], _n_size_split, cudaMemcpyDeviceToDevice)); - // NV_CUDA_CHECK(cudaMemcpy(outputs[1], (void*)((char*)inputs[0] + _n_size_split), _n_size_split, cudaMemcpyDeviceToDevice)); return 0; } - size_t Chunk::getSerializationSize() const + size_t Chunk::getSerializationSize() const noexcept { return sizeof(_n_size_split); } - void Chunk::serialize(void *buffer)const + void Chunk::serialize(void *buffer)const noexcept { *reinterpret_cast(buffer) = _n_size_split; } - const char* Chunk::getPluginType()const + const char* Chunk::getPluginType()const noexcept { return "CHUNK_TRT"; } - const char* Chunk::getPluginVersion() const - { + const char* Chunk::getPluginVersion() const noexcept + { return "1.0"; } - void Chunk::destroy() + void Chunk::destroy() noexcept { delete this; } - void Chunk::setPluginNamespace(const char* pluginNamespace) + void Chunk::setPluginNamespace(const char* pluginNamespace) noexcept { _s_plugin_namespace = pluginNamespace; } - const char* Chunk::getPluginNamespace() const + const char* Chunk::getPluginNamespace() const noexcept { return _s_plugin_namespace.c_str(); } DataType Chunk::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, - int nbInputs) const + int nbInputs) const noexcept { assert(index == 0 || index == 1); return DataType::kFLOAT; } - bool Chunk::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const + bool Chunk::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept { return false; } - bool Chunk::canBroadcastInputAcrossBatch(int inputIndex) const + bool Chunk::canBroadcastInputAcrossBatch(int inputIndex) const noexcept { return false; } - void Chunk::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) {} + void Chunk::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) + { + } void Chunk::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) { _n_size_split = in->dims.d[0] / 2 * in->dims.d[1] * in->dims.d[2] *sizeof(float); } - void Chunk::detachFromContext() {} + void Chunk::detachFromContext() + { + } + + bool Chunk::supportsFormat(DataType type, PluginFormat format) const noexcept + { + return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); + } + + void Chunk::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept + { + size_t typeSize = sizeof(float); + switch (type) + { + case DataType::kFLOAT: + typeSize = sizeof(float); + break; + case DataType::kHALF: + typeSize = sizeof(float) / 2; + break; + case DataType::kINT8: + typeSize = 1; + break; + case DataType::kINT32: + typeSize = 4; + break; + case DataType::kBOOL: + typeSize = 1; + break; + } + _n_size_split = inputDims->d[0] / 2 * inputDims->d[1] * inputDims->d[2] * typeSize; + } // Clone the plugin - IPluginV2IOExt* Chunk::clone() const + IPluginV2* Chunk::clone() const noexcept { Chunk *p = new Chunk(); p->_n_size_split = _n_size_split; @@ -153,41 +186,41 @@ namespace nvinfer1 _fc.fields = _vec_plugin_attributes.data(); } - const char* ChunkPluginCreator::getPluginName() const + const char* ChunkPluginCreator::getPluginName() const noexcept { return "CHUNK_TRT"; } - const char* ChunkPluginCreator::getPluginVersion() const + const char* ChunkPluginCreator::getPluginVersion() const noexcept { return "1.0"; } - const PluginFieldCollection* ChunkPluginCreator::getFieldNames() + const PluginFieldCollection* ChunkPluginCreator::getFieldNames()noexcept { return &_fc; } - IPluginV2IOExt* ChunkPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) + IPluginV2* ChunkPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept { Chunk* obj = new Chunk(); obj->setPluginNamespace(_s_name_space.c_str()); return obj; } - IPluginV2IOExt* ChunkPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) + IPluginV2* ChunkPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept { Chunk* obj = new Chunk(serialData,serialLength); obj->setPluginNamespace(_s_name_space.c_str()); return obj; } - void ChunkPluginCreator::setPluginNamespace(const char* libNamespace) + void ChunkPluginCreator::setPluginNamespace(const char* libNamespace)noexcept { _s_name_space = libNamespace; } - const char* ChunkPluginCreator::getPluginNamespace() const + const char* ChunkPluginCreator::getPluginNamespace() const noexcept { return _s_name_space.c_str(); } diff --git a/modules/chunk.h b/modules/chunk.h index 1e940eb..a9ab605 100644 --- a/modules/chunk.h +++ b/modules/chunk.h @@ -18,40 +18,46 @@ namespace nvinfer1 { - class Chunk : public IPluginV2IOExt + class Chunk : public IPluginV2 { public: - Chunk(); + Chunk() = default; Chunk(const void* buffer, size_t length); - ~Chunk(); - int getNbOutputs()const override; - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; - int initialize() override; - void terminate() override; - size_t getWorkspaceSize(int maxBatchSize) const override; - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - const char* getPluginType() const override; - const char* getPluginVersion() const override; - void destroy() override; - void setPluginNamespace(const char* pluginNamespace) override; - const char* getPluginNamespace() const override; - DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; - bool canBroadcastInputAcrossBatch(int inputIndex) const override; + ~Chunk() = default; + int getNbOutputs()const noexcept override; + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept override; + int initialize()noexcept override; + void terminate()noexcept override; + size_t getWorkspaceSize(int maxBatchSize) const noexcept override; + + int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept; + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) noexcept; + + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + const char* getPluginType() const noexcept override; + const char* getPluginVersion() const noexcept override; + void destroy()noexcept override; + void setPluginNamespace(const char* pluginNamespace)noexcept override; + const char* getPluginNamespace() const noexcept override; + DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept; + bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept; + bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept; void attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - void detachFromContext() override; - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override + cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator); + void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput); + void detachFromContext(); + bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const { return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; } - IPluginV2IOExt* clone() const override; + IPluginV2* clone() const noexcept override; + bool supportsFormat(DataType type, PluginFormat format) const noexcept override; + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; + private: std::string _s_plugin_namespace; - int _n_size_split; + int _n_size_split = 0; }; class ChunkPluginCreator : public IPluginCreator @@ -59,13 +65,13 @@ namespace nvinfer1 public: ChunkPluginCreator(); ~ChunkPluginCreator() override = default; - const char* getPluginName()const override; - const char* getPluginVersion() const override; - const PluginFieldCollection* getFieldNames() override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - void setPluginNamespace(const char* libNamespace) override; - const char* getPluginNamespace() const override; + const char* getPluginName()const noexcept override; + const char* getPluginVersion() const noexcept override; + const PluginFieldCollection* getFieldNames()noexcept override; + IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc)noexcept override; + IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept override; + void setPluginNamespace(const char* libNamespace)noexcept override; + const char* getPluginNamespace() const noexcept override; private: std::string _s_name_space; static PluginFieldCollection _fc; diff --git a/modules/class_detector.cpp b/modules/class_detector.cpp index e15fa78..8adbd94 100644 --- a/modules/class_detector.cpp +++ b/modules/class_detector.cpp @@ -1,36 +1,40 @@ #include "class_detector.h" #include "class_yolo_detector.hpp" -class Detector::Impl -{ -public: - Impl() {} + class Detector::Impl + { + public: + Impl() = default; + ~Impl() = default; - ~Impl(){} + YoloDectector _detector; + }; - YoloDectector _detector; -}; + Detector::Detector() + { + _impl = new Impl(); + } -Detector::Detector() -{ - _impl = new Impl(); -} + Detector::~Detector() + { + if (_impl) + { + delete _impl; + _impl = nullptr; + } + } -Detector::~Detector() -{ - if (_impl) + void Detector::init(const Config &config) { - delete _impl; - _impl = nullptr; + _impl->_detector.init(config); } -} -void Detector::init(const Config &config) -{ - _impl->_detector.init(config); -} + void Detector::detect(const std::vector &mat_image, std::vector &vec_batch_result) + { + _impl->_detector.detect(mat_image, vec_batch_result); + } -void Detector::detect(const std::vector &mat_image, std::vector &vec_batch_result) -{ - _impl->_detector.detect(mat_image, vec_batch_result); -} + cv::Size Detector::get_input_size() const + { + return _impl->_detector.get_input_size(); + } diff --git a/modules/class_detector.h b/modules/class_detector.h index 74d68a0..d6b5377 100644 --- a/modules/class_detector.h +++ b/modules/class_detector.h @@ -5,68 +5,73 @@ #include #include -struct Result -{ - int id = -1; - float prob = 0.f; - cv::Rect rect; -}; - -using BatchResult = std::vector; - -enum ModelType -{ - YOLOV2 = 0, - YOLOV3, - YOLOV2_TINY, - YOLOV3_TINY, - YOLOV4, - YOLOV4_TINY, - YOLOV5 -}; - -enum Precision -{ - INT8 = 0, - FP16, - FP32 -}; - -struct Config -{ - std::string file_model_cfg = "configs/yolov3.cfg"; - - std::string file_model_weights = "configs/yolov3.weights"; - - float detect_thresh = 0.9; - - ModelType net_type = YOLOV3; - - Precision inference_precison = FP32; + struct Result + { + int id = -1; + float prob = 0.f; + cv::Rect rect; + + Result(int id_, float prob_, cv::Rect r) + : id(id_), prob(prob_), rect(r) + { + } + }; - int gpu_id = 0; + using BatchResult = std::vector; - std::string calibration_image_list_file_txt = "configs/calibration_images.txt"; + enum ModelType + { + YOLOV3, + YOLOV4, + YOLOV4_TINY, + YOLOV5 + }; -}; + enum Precision + { + INT8 = 0, + FP16, + FP32 + }; -class API Detector -{ -public: - explicit Detector(); + struct Config + { + std::string file_model_cfg = "yolov4.cfg"; - ~Detector(); + std::string file_model_weights = "yolov4.weights"; - void init(const Config &config); + float detect_thresh = 0.5f; - void detect(const std::vector &mat_image, std::vector &vec_batch_result); + ModelType net_type = YOLOV4; -private: - - Detector(const Detector &); - const Detector &operator =(const Detector &); - class Impl; - Impl *_impl; -}; + Precision inference_precison = FP32; + + int gpu_id = 0; + + uint32_t batch_size = 1; + + std::string calibration_image_list_file_txt = "configs/calibration_images.txt"; + }; + + class API Detector + { + public: + explicit Detector(); + + ~Detector(); + + void init(const Config &config); + + void detect(const std::vector &mat_image, std::vector &vec_batch_result); + + cv::Size get_input_size() const; + + private: + + Detector(const Detector &); + const Detector &operator =(const Detector &); + class Impl; + Impl *_impl = nullptr; + }; -#endif // !CLASS_QH_DETECTOR_H_ \ No newline at end of file +#endif // !CLASS_QH_DETECTOR_H_ diff --git a/modules/class_yolo_detector.hpp b/modules/class_yolo_detector.hpp index a70c57e..f738247 100644 --- a/modules/class_yolo_detector.hpp +++ b/modules/class_yolo_detector.hpp @@ -10,7 +10,6 @@ #include "yolov4.h" #include "yolov5.h" -#include #include #include #include @@ -21,14 +20,8 @@ class YoloDectector { public: - YoloDectector() - { - - } - ~YoloDectector() - { - - } + YoloDectector() = default; + ~YoloDectector() = default; void init(const Config &config) { @@ -44,46 +37,42 @@ class YoloDectector void detect(const std::vector &vec_image, std::vector &vec_batch_result) { - Timer timer; std::vector vec_ds_images; vec_batch_result.clear(); - vec_batch_result.resize(vec_image.size()); + if (vec_batch_result.capacity() < vec_image.size()) + vec_batch_result.reserve(vec_image.size()); for (const auto &img:vec_image) { vec_ds_images.emplace_back(img, _vec_net_type[_config.net_type], _p_net->getInputH(), _p_net->getInputW()); } - cv::Mat trtInput = blobFromDsImages(vec_ds_images, _p_net->getInputH(),_p_net->getInputW()); - timer.out("pre"); - _p_net->doInference(trtInput.data, vec_ds_images.size()); - timer.reset(); - for (uint32_t i = 0; i < vec_ds_images.size(); ++i) + blobFromDsImages(vec_ds_images, m_blob, _p_net->getInputH(),_p_net->getInputW()); + _p_net->doInference(m_blob.data, static_cast(vec_ds_images.size())); + for (size_t i = 0; i < vec_ds_images.size(); ++i) { auto curImage = vec_ds_images.at(i); - auto binfo = _p_net->decodeDetections(i, curImage.getImageHeight(), curImage.getImageWidth()); - auto remaining = nmsAllClasses(_p_net->getNMSThresh(), - binfo, - _p_net->getNumClasses(), - _vec_net_type[_config.net_type]); - if (remaining.empty()) - { - continue; - } - std::vector vec_result(0); - for (const auto &b : remaining) + auto binfo = _p_net->decodeDetections(static_cast(i), curImage.getImageHeight(), curImage.getImageWidth()); + auto remaining = (_p_net->getNMSThresh() > 0) ? nmsAllClasses(_p_net->getNMSThresh(), binfo, _p_net->getNumClasses(), _vec_net_type[_config.net_type]) : binfo; + + std::vector vec_result; + if (!remaining.empty()) { - Result res; - res.id = b.label; - res.prob = b.prob; - const int x = b.box.x1; - const int y = b.box.y1; - const int w = b.box.x2 - b.box.x1; - const int h = b.box.y2 - b.box.y1; - res.rect = cv::Rect(x, y, w, h); - vec_result.push_back(res); + vec_result.reserve(remaining.size()); + for (const auto &b : remaining) + { + const int x = cvRound(b.box.x1); + const int y = cvRound(b.box.y1); + const int w = cvRound(b.box.x2 - b.box.x1); + const int h = cvRound(b.box.y2 - b.box.y1); + vec_result.emplace_back(b.label, b.prob, cv::Rect(x, y, w, h)); + } } - vec_batch_result[i] = vec_result; + vec_batch_result.emplace_back(vec_result); } - timer.out("post"); + } + + cv::Size get_input_size() const + { + return cv::Size(_p_net->getInputW(), _p_net->getInputH()); } private: @@ -118,19 +107,16 @@ class YoloDectector _infer_param.calibImagesPath = ""; _infer_param.probThresh = _config.detect_thresh; _infer_param.nmsThresh = 0.5; + _infer_param.batchSize = _config.batch_size; } void build_net() { - if ((_config.net_type == YOLOV2) || (_config.net_type == YOLOV2_TINY)) - { - _p_net = std::unique_ptr{ new YoloV2( _yolo_info, _infer_param) }; - } - else if ((_config.net_type == YOLOV3) || (_config.net_type == YOLOV3_TINY)) + if (_config.net_type == YOLOV3) { _p_net = std::unique_ptr{ new YoloV3(_yolo_info, _infer_param) }; } - else if( (_config.net_type == YOLOV4) || (_config.net_type == YOLOV4_TINY)) + else if (_config.net_type == YOLOV4 || _config.net_type == YOLOV4_TINY) { _p_net = std::unique_ptr{ new YoloV4(_yolo_info,_infer_param) }; } @@ -148,10 +134,11 @@ class YoloDectector Config _config; NetworkInfo _yolo_info; InferParams _infer_param; - std::vector _vec_net_type{ "yolov2","yolov3","yolov2-tiny","yolov3-tiny","yolov4","yolov4-tiny","yolov5" }; + std::vector _vec_net_type{ "yolov3", "yolov4", "yolov4-tiny", "yolov5" }; std::vector _vec_precision{ "kINT8","kHALF","kFLOAT" }; std::unique_ptr _p_net = nullptr; Timer _m_timer; + cv::Mat m_blob; }; diff --git a/modules/detect.cu b/modules/detect.cu index 4d8f20e..22a628e 100644 --- a/modules/detect.cu +++ b/modules/detect.cu @@ -122,20 +122,42 @@ namespace nvinfer1 return cudaGetLastError(); } - int Detect::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) + int Detect::enqueue(int batchSize, + const void* const* inputs, + void* const* outputs, + void* workspace, + cudaStream_t stream) noexcept { NV_CUDA_CHECK(cuda_detect_layer(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, _n_classes, _n_anchor, _n_output_size, stream)); return 0; } - size_t Detect::getSerializationSize() const + int Detect::enqueue(int batchSize, + const void* const* inputs, + void** outputs, + void* workspace, + cudaStream_t stream) noexcept + { + return enqueue(batchSize, inputs, (void* const*)outputs, workspace, stream); + } + + bool Detect::supportsFormat(DataType type, PluginFormat format) const noexcept + { + return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); + } + + void Detect::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept + { + + } + + size_t Detect::getSerializationSize() const noexcept { return sizeof(_n_anchor) + sizeof(_n_classes) + sizeof(_n_grid_h) + sizeof(_n_grid_w) + sizeof(_n_output_size); } - void Detect::serialize(void *buffer) const + void Detect::serialize(void *buffer) const noexcept { char *d = static_cast(buffer), *a = d; write(d,_n_anchor); @@ -150,7 +172,7 @@ namespace nvinfer1 { } - IPluginV2IOExt* Detect::clone() const + IPluginV2* Detect::clone() const noexcept { Detect *p = new Detect(_n_anchor,_n_classes,_n_grid_h,_n_grid_w); p->setPluginNamespace(_s_plugin_namespace.c_str()); @@ -169,41 +191,41 @@ namespace nvinfer1 _fc.fields = _vec_plugin_attributes.data(); } - const char* DetectPluginCreator::getPluginName() const + const char* DetectPluginCreator::getPluginName() const noexcept { return "DETECT_TRT"; } - const char* DetectPluginCreator::getPluginVersion() const + const char* DetectPluginCreator::getPluginVersion() const noexcept { return "1.0"; } - const PluginFieldCollection* DetectPluginCreator::getFieldNames() + const PluginFieldCollection* DetectPluginCreator::getFieldNames() noexcept { return &_fc; } - IPluginV2IOExt* DetectPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) + IPluginV2* DetectPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) noexcept { Detect* obj = new Detect(); obj->setPluginNamespace(_s_name_space.c_str()); return obj; } - IPluginV2IOExt* DetectPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) + IPluginV2* DetectPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept { Detect* obj = new Detect(serialData, serialLength); obj->setPluginNamespace(_s_name_space.c_str()); return obj; } - void DetectPluginCreator::setPluginNamespace(const char* libNamespace) + void DetectPluginCreator::setPluginNamespace(const char* libNamespace) noexcept { _s_name_space = libNamespace; } - const char* DetectPluginCreator::getPluginNamespace() const + const char* DetectPluginCreator::getPluginNamespace() const noexcept { return _s_name_space.c_str(); } diff --git a/modules/detect.h b/modules/detect.h index 7eff6c0..7743264 100644 --- a/modules/detect.h +++ b/modules/detect.h @@ -21,7 +21,7 @@ namespace nvinfer1 buffer += sizeof(T); } - class Detect :public IPluginV2IOExt + class Detect :public IPluginV2 { public: Detect(); @@ -30,71 +30,76 @@ namespace nvinfer1 const uint32_t n_grid_h_, const uint32_t n_grid_w_/*, const uint32_t &n_stride_h_, const uint32_t &n_stride_w_*/); ~Detect(); - int getNbOutputs()const override + int getNbOutputs()const noexcept override { return 1; } - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept override { return inputs[0]; } - int initialize() override + int initialize() noexcept override { return 0; } - void terminate() override + void terminate() noexcept override { } - size_t getWorkspaceSize(int maxBatchSize) const override + size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0; } - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - const char* getPluginType() const override + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) noexcept; + int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept; + + bool supportsFormat(DataType type, PluginFormat format) const noexcept override; + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; + + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + const char* getPluginType() const noexcept override { return "DETECT_TRT"; } - const char* getPluginVersion() const override + const char* getPluginVersion() const noexcept override { return "1.0"; } - void destroy() override + void destroy() noexcept override { delete this; } - void setPluginNamespace(const char* pluginNamespace) override + void setPluginNamespace(const char* pluginNamespace) noexcept override { _s_plugin_namespace = pluginNamespace; } - const char* getPluginNamespace() const override + const char* getPluginNamespace() const noexcept override { return _s_plugin_namespace.c_str(); } - DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override + DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept { return DataType::kFLOAT; } - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override + bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept { return false; } - bool canBroadcastInputAcrossBatch(int inputIndex) const override + bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept { return false; } void attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override + cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) {} - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - void detachFromContext() override + void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) ; + void detachFromContext() {} - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override + bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept { return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; } - IPluginV2IOExt* clone() const override; + IPluginV2* clone() const noexcept override; private: uint32_t _n_anchor; @@ -112,13 +117,13 @@ namespace nvinfer1 public: DetectPluginCreator(); ~DetectPluginCreator() override = default; - const char* getPluginName()const override; - const char* getPluginVersion() const override; - const PluginFieldCollection* getFieldNames() override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - void setPluginNamespace(const char* libNamespace) override; - const char* getPluginNamespace() const override; + const char* getPluginName()const noexcept override; + const char* getPluginVersion() const noexcept override; + const PluginFieldCollection* getFieldNames() noexcept override; + IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; + IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; + void setPluginNamespace(const char* libNamespace) noexcept override; + const char* getPluginNamespace() const noexcept override; private: std::string _s_name_space; static PluginFieldCollection _fc; diff --git a/modules/ds_image.cpp b/modules/ds_image.cpp index 6441a44..e3e6e1e 100644 --- a/modules/ds_image.cpp +++ b/modules/ds_image.cpp @@ -23,27 +23,16 @@ SOFTWARE. * */ #include "ds_image.h" -#include -DsImage::DsImage() : - m_Height(0), - m_Width(0), - m_XOffset(0), - m_YOffset(0), - m_ScalingFactor(0.0), - m_RNG(cv::RNG(unsigned(std::time(0)))), - m_ImageName() -{ -} +#ifdef HAVE_FILESYSTEM +#include +namespace fs = std::filesystem; +#else +#include +namespace fs = std::experimental::filesystem; +#endif -DsImage::DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW) : - m_Height(0), - m_Width(0), - m_XOffset(0), - m_YOffset(0), - m_ScalingFactor(0.0), - m_RNG(cv::RNG(unsigned(std::time(0)))), - m_ImageName() +DsImage::DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW) { m_OrigImage = mat_image_; m_Height = m_OrigImage.rows; @@ -92,18 +81,11 @@ DsImage::DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, cons // converting to RGB //cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); } - } -DsImage::DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW) : - m_Height(0), - m_Width(0), - m_XOffset(0), - m_YOffset(0), - m_ScalingFactor(0.0), - m_RNG(cv::RNG(unsigned(std::time(0)))), - m_ImageName() + +DsImage::DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW) { - m_ImageName = std::experimental::filesystem::path(path).stem().string(); + m_ImageName = fs::path(path).stem().string(); m_OrigImage = cv::imread(path, cv::IMREAD_UNCHANGED); m_Height = m_OrigImage.rows; m_Width = m_OrigImage.cols; @@ -183,23 +165,20 @@ void DsImage::letterbox(const int& inputH, const int& inputW) assert(2 * m_YOffset + resizeH == inputH); // resizing - cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_CUBIC); + cv::resize(m_OrigImage, m_LetterboxImage, cv::Size(resizeW, resizeH), 0, 0, cv::INTER_LINEAR); // letterboxing cv::copyMakeBorder(m_LetterboxImage, m_LetterboxImage, m_YOffset, m_YOffset, m_XOffset, m_XOffset, cv::BORDER_CONSTANT, cv::Scalar(128, 128, 128)); // cv::imwrite("letter.jpg", m_LetterboxImage); - // converting to RGB - cv::cvtColor(m_LetterboxImage, m_LetterboxImage, cv::COLOR_BGR2RGB); } - void DsImage::addBBox(BBoxInfo box, const std::string& labelName) { m_Bboxes.push_back(box); - const int x = box.box.x1; - const int y = box.box.y1; - const int w = box.box.x2 - box.box.x1; - const int h = box.box.y2 - box.box.y1; + const int x = cvRound(box.box.x1); + const int y = cvRound(box.box.y1); + const int w = cvRound(box.box.x2 - box.box.x1); + const int h = cvRound(box.box.y2 - box.box.y1); const cv::Scalar color = cv::Scalar(m_RNG.uniform(0, 255), m_RNG.uniform(0, 255), m_RNG.uniform(0, 255)); @@ -244,4 +223,4 @@ std::string DsImage::exportJson() const json << "}"; } return json.str(); -} \ No newline at end of file +} diff --git a/modules/ds_image.h b/modules/ds_image.h index e2a3c87..5894585 100644 --- a/modules/ds_image.h +++ b/modules/ds_image.h @@ -32,12 +32,12 @@ struct BBoxInfo; class DsImage { public: - DsImage(); + DsImage() = default; DsImage(const std::string& path, const std::string &s_net_type_, const int& inputH, const int& inputW); DsImage(const cv::Mat& mat_image_, const std::string &s_net_type_, const int& inputH, const int& inputW); int getImageHeight() const { return m_Height; } int getImageWidth() const { return m_Width; } - cv::Mat getLetterBoxedImage() const { return m_LetterboxImage; } + const cv::Mat& getLetterBoxedImage() const { return m_LetterboxImage; } cv::Mat getOriginalImage() const { return m_OrigImage; } std::string getImageName() const { return m_ImageName; } void addBBox(BBoxInfo box, const std::string& labelName); @@ -45,14 +45,15 @@ class DsImage void saveImageJPEG(const std::string& dirPath) const; std::string exportJson() const; void letterbox(const int& inputH, const int& inputW); + private: - int m_Height; - int m_Width; - int m_XOffset; - int m_YOffset; - float m_ScalingFactor; + int m_Height = 0; + int m_Width = 0; + int m_XOffset = 0; + int m_YOffset = 0; + float m_ScalingFactor = 0.0f; std::string m_ImagePath; - cv::RNG m_RNG; + cv::RNG m_RNG { cv::RNG(unsigned(std::time(0))) }; std::string m_ImageName; std::vector m_Bboxes; diff --git a/modules/hardswish.cu b/modules/hardswish.cu index fe5dc31..5456131 100644 --- a/modules/hardswish.cu +++ b/modules/hardswish.cu @@ -27,7 +27,6 @@ namespace nvinfer1 cudaDeviceProp prop; cudaGetDeviceProperties(&prop, 0); _n_max_thread_pre_block = prop.maxThreadsPerBlock; - // printf("Hardswish():%d\n", _n_max_thread_pre_block); } Hardswish::Hardswish(const void* data, size_t length) @@ -35,7 +34,6 @@ namespace nvinfer1 const char *d = reinterpret_cast(data), *a = d; r(d, _n_max_thread_pre_block); r(d, _n_output_size); -// printf("r:threads:%d,size:%d\n", _n_max_thread_pre_block, _n_output_size); assert(d == a + length); } @@ -68,7 +66,6 @@ namespace nvinfer1 cudaStream_t stream_) { int n_data_size = n_batch_size_ * n_output_size_; -// printf("cuda_hardswish_layer:%d,size:%d\n", n_batch_size_, n_output_size_); kernel_hardswish << <(n_data_size + threads_ -1)/threads_, threads_ >> >( reinterpret_cast(input_), reinterpret_cast(output_), @@ -76,41 +73,64 @@ namespace nvinfer1 return cudaGetLastError(); } - int Hardswish::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) + int Hardswish::enqueue(int batchSize, + const void* const* inputs, + void* const* outputs, + void* workspace, + cudaStream_t stream) noexcept { -// printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block); + //printf("batch_size:%d,output_size:%d,threads:%d\n", batchSize, _n_output_size, _n_max_thread_pre_block); NV_CUDA_CHECK(cuda_hardswish_layer(inputs[0], outputs[0], batchSize, _n_output_size , _n_max_thread_pre_block,stream)); return 0; } - size_t Hardswish::getSerializationSize() const + int Hardswish::enqueue(int batchSize, + const void* const* inputs, + void** outputs, + void* workspace, + cudaStream_t stream) noexcept + { + return enqueue(batchSize, inputs, (void* const*)outputs, workspace, stream); + } + + size_t Hardswish::getSerializationSize() const noexcept { return sizeof(_n_max_thread_pre_block) +sizeof(_n_output_size); } - void Hardswish::serialize(void *buffer) const + void Hardswish::serialize(void *buffer) const noexcept { char *d = static_cast(buffer), *a = d; w(d, _n_max_thread_pre_block); w(d, _n_output_size); -// printf("serialize:%d,%d\n", _n_max_thread_pre_block, _n_output_size); assert(d == a + getSerializationSize()); } - void Hardswish::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) + + bool Hardswish::supportsFormat(DataType type, PluginFormat format) const noexcept + { + return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); + } + + void Hardswish::configureWithFormat(const Dims* inputDims, int nbInputs, + const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept + { + + } + + + void Hardswish::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) noexcept { _n_output_size = in->dims.d[0] * in->dims.d[1] * in->dims.d[2]; -// printf("configurePlugin:%d,%d,%d\n", in->dims.d[0], in->dims.d[1], in->dims.d[2]); + // printf("output_size:%d,threads:%d\n", _n_output_size, _n_max_thread_pre_block); } - IPluginV2IOExt* Hardswish::clone() const + IPluginV2* Hardswish::clone() const noexcept { Hardswish *p = new Hardswish(); p->setPluginNamespace(_s_plugin_namespace.c_str()); p->_n_max_thread_pre_block = _n_max_thread_pre_block; p->_n_output_size = _n_output_size; -// printf("clone:%d,%d\n", _n_max_thread_pre_block, _n_output_size); return p; } @@ -126,41 +146,41 @@ namespace nvinfer1 _fc.fields = _vec_plugin_attributes.data(); } - const char* HardswishPluginCreator::getPluginName() const + const char* HardswishPluginCreator::getPluginName() const noexcept { return "HARDSWISH_TRT"; } - const char* HardswishPluginCreator::getPluginVersion() const + const char* HardswishPluginCreator::getPluginVersion() const noexcept { return "1.0"; } - const PluginFieldCollection* HardswishPluginCreator::getFieldNames() + const PluginFieldCollection* HardswishPluginCreator::getFieldNames() noexcept { return &_fc; } - IPluginV2IOExt* HardswishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) + IPluginV2* HardswishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) noexcept { Hardswish* obj = new Hardswish(); obj->setPluginNamespace(_s_name_space.c_str()); return obj; } - IPluginV2IOExt* HardswishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) + IPluginV2* HardswishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept { Hardswish* obj = new Hardswish(serialData, serialLength); obj->setPluginNamespace(_s_name_space.c_str()); return obj; } - void HardswishPluginCreator::setPluginNamespace(const char* libNamespace) + void HardswishPluginCreator::setPluginNamespace(const char* libNamespace) noexcept { _s_name_space = libNamespace; } - const char* HardswishPluginCreator::getPluginNamespace() const + const char* HardswishPluginCreator::getPluginNamespace() const noexcept { return _s_name_space.c_str(); } diff --git a/modules/hardswish.h b/modules/hardswish.h index 4cd824e..5cc96d3 100644 --- a/modules/hardswish.h +++ b/modules/hardswish.h @@ -1,6 +1,6 @@ #ifndef _HARDSWISH_H_ -#define _HARDSWISH_H_ +#define _HARDSWISH_H_ #include #include @@ -22,77 +22,83 @@ namespace nvinfer1 buffer += sizeof(T); } - class Hardswish :public IPluginV2IOExt + class Hardswish :public IPluginV2 { public: Hardswish(); Hardswish(const void* data, size_t length); ~Hardswish(); - int getNbOutputs()const override + int getNbOutputs()const noexcept override { return 1; } - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept override { return inputs[0]; } - int initialize() override + int initialize() noexcept override { return 0; } - void terminate() override + void terminate() noexcept override { } - size_t getWorkspaceSize(int maxBatchSize) const override + size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0; } - int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream)override; - size_t getSerializationSize() const override; - void serialize(void* buffer) const override; - const char* getPluginType() const override + + bool supportsFormat(DataType type, PluginFormat format) const noexcept override; + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; + + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) noexcept; + int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept; + + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + const char* getPluginType() const noexcept override { return "HARDSWISH_TRT"; } - const char* getPluginVersion() const override + const char* getPluginVersion() const noexcept override { return "1.0"; } - void destroy() override + void destroy() noexcept override { delete this; } - void setPluginNamespace(const char* pluginNamespace) override + void setPluginNamespace(const char* pluginNamespace) noexcept override { _s_plugin_namespace = pluginNamespace; } - const char* getPluginNamespace() const override + const char* getPluginNamespace() const noexcept override { return _s_plugin_namespace.c_str(); } - DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override + DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept { return DataType::kFLOAT; } - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override + bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept { return false; } - bool canBroadcastInputAcrossBatch(int inputIndex) const override + bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept { return false; } void attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override + cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) noexcept {} - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; - void detachFromContext() override + void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) noexcept; + void detachFromContext() noexcept {} - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override + bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept { return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; } - IPluginV2IOExt* clone() const override; + IPluginV2* clone() const noexcept override; private: uint32_t _n_max_thread_pre_block; @@ -105,13 +111,13 @@ namespace nvinfer1 public: HardswishPluginCreator(); ~HardswishPluginCreator() override = default; - const char* getPluginName()const override; - const char* getPluginVersion() const override; - const PluginFieldCollection* getFieldNames() override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; - void setPluginNamespace(const char* libNamespace) override; - const char* getPluginNamespace() const override; + const char* getPluginName()const noexcept override; + const char* getPluginVersion() const noexcept override; + const PluginFieldCollection* getFieldNames() noexcept override; + IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; + IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; + void setPluginNamespace(const char* libNamespace) noexcept override; + const char* getPluginNamespace() const noexcept override; private: std::string _s_name_space; static PluginFieldCollection _fc; @@ -122,4 +128,4 @@ namespace nvinfer1 -#endif \ No newline at end of file +#endif diff --git a/modules/mish.cu b/modules/mish.cu index d05f609..6ad4242 100644 --- a/modules/mish.cu +++ b/modules/mish.cu @@ -21,22 +21,33 @@ namespace nvinfer1 input_size_ = *reinterpret_cast(data); } - void MishPlugin::serialize(void* buffer) const + void MishPlugin::serialize(void* buffer) const noexcept { *reinterpret_cast(buffer) = input_size_; } - size_t MishPlugin::getSerializationSize() const + size_t MishPlugin::getSerializationSize() const noexcept { return sizeof(input_size_); } - int MishPlugin::initialize() + int MishPlugin::initialize()noexcept { return 0; } - Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims) + bool MishPlugin::supportsFormat(DataType type, PluginFormat format) const noexcept + { + return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); + } + + void MishPlugin::configureWithFormat(const Dims* inputDims, int nbInputs, + const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept + { + + } + + Dims MishPlugin::getOutputDimensions(int index, const Dims* inputs, int nbInputDims)noexcept { assert(nbInputDims == 1); assert(index == 0); @@ -46,63 +57,63 @@ namespace nvinfer1 } // Set plugin namespace - void MishPlugin::setPluginNamespace(const char* pluginNamespace) + void MishPlugin::setPluginNamespace(const char* pluginNamespace)noexcept { mPluginNamespace = pluginNamespace; } - const char* MishPlugin::getPluginNamespace() const + const char* MishPlugin::getPluginNamespace() const noexcept { return mPluginNamespace; } // Return the DataType of the plugin output at the requested index - DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const + DataType MishPlugin::getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept { return DataType::kFLOAT; } // Return true if output tensor is broadcast across a batch. - bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const + bool MishPlugin::isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept { return false; } // Return true if plugin can use input that is broadcast across batch without replication. - bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const + bool MishPlugin::canBroadcastInputAcrossBatch(int inputIndex) const noexcept { return false; } - void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) + void MishPlugin::configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)noexcept { } // Attach the plugin object to an execution context and grant the plugin the access to some context resource. - void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) + void MishPlugin::attachToContext(cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)noexcept { } // Detach the plugin object from its execution context. - void MishPlugin::detachFromContext() {} + void MishPlugin::detachFromContext()noexcept {} - const char* MishPlugin::getPluginType() const + const char* MishPlugin::getPluginType() const noexcept { return "Mish_TRT"; } - const char* MishPlugin::getPluginVersion() const + const char* MishPlugin::getPluginVersion() const noexcept { return "1"; } - void MishPlugin::destroy() + void MishPlugin::destroy()noexcept { delete this; } // Clone the plugin - IPluginV2IOExt* MishPlugin::clone() const + IPluginV2* MishPlugin::clone() const noexcept { MishPlugin *p = new MishPlugin(); p->input_size_ = input_size_; @@ -112,13 +123,15 @@ namespace nvinfer1 __device__ float tanh_activate_kernel(float x){return (2/(1 + expf(-2*x)) - 1);} - __device__ float softplus_kernel(float x, float threshold = 20) { + __device__ float softplus_kernel(float x, float threshold = 20) + { if (x > threshold) return x; // too large else if (x < -threshold) return expf(x); // too small return logf(expf(x) + 1); } - __global__ void mish_kernel(const float *input, float *output, int num_elem) { + __global__ void mish_kernel(const float *input, float *output, int num_elem) + { int idx = threadIdx.x + blockDim.x * blockIdx.x; if (idx >= num_elem) return; @@ -135,14 +148,19 @@ namespace nvinfer1 output[idx] = input[idx] * tanh_activate_kernel(softplus_kernel(input[idx])); } - void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) { + void MishPlugin::forwardGpu(const float *const * inputs, float* output, cudaStream_t stream, int batchSize) + { int block_size = thread_count_; int grid_size = (input_size_ * batchSize + block_size - 1) / block_size; mish_kernel<<>>(inputs[0], output, input_size_ * batchSize); } - int MishPlugin::enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) - { + int MishPlugin::enqueue(int batchSize, + const void* const* inputs, + void* const* outputs, + void* workspace, + cudaStream_t stream) noexcept + { //assert(batchSize == 1); //GPU //CUDA_CHECK(cudaStreamSynchronize(stream)); @@ -150,6 +168,15 @@ namespace nvinfer1 return 0; } + int MishPlugin::enqueue(int batchSize, + const void* const* inputs, + void** outputs, + void* workspace, + cudaStream_t stream) noexcept + { + return enqueue(batchSize, inputs, (void* const*)outputs, workspace, stream); + } + PluginFieldCollection MishPluginCreator::mFC{}; std::vector MishPluginCreator::mPluginAttributes; @@ -161,29 +188,29 @@ namespace nvinfer1 mFC.fields = mPluginAttributes.data(); } - const char* MishPluginCreator::getPluginName() const + const char* MishPluginCreator::getPluginName() const noexcept { return "Mish_TRT"; } - const char* MishPluginCreator::getPluginVersion() const + const char* MishPluginCreator::getPluginVersion() const noexcept { return "1"; } - const PluginFieldCollection* MishPluginCreator::getFieldNames() + const PluginFieldCollection* MishPluginCreator::getFieldNames()noexcept { return &mFC; } - IPluginV2IOExt* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc) + IPluginV2* MishPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept { MishPlugin* obj = new MishPlugin(); obj->setPluginNamespace(mNamespace.c_str()); return obj; } - IPluginV2IOExt* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength) + IPluginV2* MishPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept { // This object will be deleted when the network is destroyed, which will // call MishPlugin::destroy() @@ -192,5 +219,17 @@ namespace nvinfer1 return obj; } + + void MishPluginCreator::setPluginNamespace(const char* libNamespace)noexcept + { + mNamespace = libNamespace; + } + + const char* MishPluginCreator::getPluginNamespace() const noexcept + { + return mNamespace.c_str(); + } + + } diff --git a/modules/mish.h b/modules/mish.h index bae6f1f..7581a97 100644 --- a/modules/mish.h +++ b/modules/mish.h @@ -9,7 +9,7 @@ //https://github.com/wang-xinyu/tensorrtx namespace nvinfer1 { - class MishPlugin: public IPluginV2IOExt + class MishPlugin: public IPluginV2 { public: explicit MishPlugin(); @@ -17,53 +17,57 @@ namespace nvinfer1 ~MishPlugin(); - int getNbOutputs() const override + int getNbOutputs() const noexcept override { return 1; } - Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override; + Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) noexcept override; - int initialize() override; + int initialize() noexcept override; - virtual void terminate() override {} + virtual void terminate() noexcept override {} - virtual size_t getWorkspaceSize(int maxBatchSize) const override { return 0;} + virtual size_t getWorkspaceSize(int maxBatchSize) const noexcept override { return 0;} - virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override; + // virtual int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream); + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) noexcept; + int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept; + bool supportsFormat(DataType type, PluginFormat format) const noexcept override; + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; - virtual size_t getSerializationSize() const override; + virtual size_t getSerializationSize() const noexcept override; - virtual void serialize(void* buffer) const override; + virtual void serialize(void* buffer) const noexcept override; - bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const override { + bool supportsFormatCombination(int pos, const PluginTensorDesc* inOut, int nbInputs, int nbOutputs) const noexcept { return inOut[pos].format == TensorFormat::kLINEAR && inOut[pos].type == DataType::kFLOAT; } - const char* getPluginType() const override; + const char* getPluginType() const noexcept override; - const char* getPluginVersion() const override; + const char* getPluginVersion() const noexcept override; - void destroy() override; + void destroy() noexcept override; - IPluginV2IOExt* clone() const override; + IPluginV2* clone() const noexcept override; - void setPluginNamespace(const char* pluginNamespace) override; + void setPluginNamespace(const char* pluginNamespace) noexcept override; - const char* getPluginNamespace() const override; + const char* getPluginNamespace() const noexcept override; - DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const override; + DataType getOutputDataType(int index, const nvinfer1::DataType* inputTypes, int nbInputs) const noexcept; - bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const override; + bool isOutputBroadcastAcrossBatch(int outputIndex, const bool* inputIsBroadcasted, int nbInputs) const noexcept; - bool canBroadcastInputAcrossBatch(int inputIndex) const override; + bool canBroadcastInputAcrossBatch(int inputIndex) const noexcept; void attachToContext( - cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator) override; + cudnnContext* cudnnContext, cublasContext* cublasContext, IGpuAllocator* gpuAllocator)noexcept; - void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput) override; + void configurePlugin(const PluginTensorDesc* in, int nbInput, const PluginTensorDesc* out, int nbOutput)noexcept; - void detachFromContext() override; + void detachFromContext()noexcept; int input_size_; private: @@ -79,25 +83,19 @@ namespace nvinfer1 ~MishPluginCreator() override = default; - const char* getPluginName() const override; + const char* getPluginName() const noexcept override; - const char* getPluginVersion() const override; + const char* getPluginVersion() const noexcept override; - const PluginFieldCollection* getFieldNames() override; + const PluginFieldCollection* getFieldNames() noexcept override; - IPluginV2IOExt* createPlugin(const char* name, const PluginFieldCollection* fc) override; + IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; - IPluginV2IOExt* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override; + IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; - void setPluginNamespace(const char* libNamespace) override - { - mNamespace = libNamespace; - } + void setPluginNamespace(const char* libNamespace) noexcept override; - const char* getPluginNamespace() const override - { - return mNamespace.c_str(); - } + const char* getPluginNamespace() const noexcept override; private: std::string mNamespace; diff --git a/modules/plugin_factory.cpp b/modules/plugin_factory.cpp index 3e2209a..2a435f0 100644 --- a/modules/plugin_factory.cpp +++ b/modules/plugin_factory.cpp @@ -1,176 +1,171 @@ -/** -MIT License - -Copyright (c) 2018 NVIDIA CORPORATION. All rights reserved. - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -* -*/ + #include "plugin_factory.h" #include "trt_utils.h" -PluginFactory::PluginFactory() : m_ReorgLayer{nullptr}, m_RegionLayer{nullptr} -{ - for (int i = 0; i < m_MaxLeakyLayers; ++i) m_LeakyReLULayers[i] = nullptr; -} - -nvinfer1::IPlugin* PluginFactory::createPlugin(const char* layerName, const void* serialData, - size_t serialLength) -{ - assert(isPlugin(layerName)); - if (std::string(layerName).find("leaky") != std::string::npos) - { - assert(m_LeakyReLUCount >= 0 && m_LeakyReLUCount <= m_MaxLeakyLayers); - assert(m_LeakyReLULayers[m_LeakyReLUCount] == nullptr); - /*m_LeakyReLULayers[m_LeakyReLUCount] - = unique_ptr_INvPlugin(nvinfer1::plugin::createPReLUPlugin(serialData, serialLength));*/ - ++m_LeakyReLUCount; - return m_LeakyReLULayers[m_LeakyReLUCount - 1].get(); - } - else if (std::string(layerName).find("reorg") != std::string::npos) - { - assert(m_ReorgLayer == nullptr); - /*m_ReorgLayer = unique_ptr_INvPlugin( - nvinfer1::plugin::createYOLOReorgPlugin(serialData, serialLength));*/ - return m_ReorgLayer.get(); - } - else if (std::string(layerName).find("region") != std::string::npos) - { - assert(m_RegionLayer == nullptr); - /*m_RegionLayer = unique_ptr_INvPlugin( - nvinfer1::plugin::createYOLORegionPlugin(serialData, serialLength));*/ - return m_RegionLayer.get(); - } - else if (std::string(layerName).find("yolo") != std::string::npos) - { - assert(m_YoloLayerCount >= 0 && m_YoloLayerCount < m_MaxYoloLayers); - assert(m_YoloLayers[m_YoloLayerCount] == nullptr); - m_YoloLayers[m_YoloLayerCount] - = unique_ptr_IPlugin(new YoloLayerV3(serialData, serialLength)); - ++m_YoloLayerCount; - return m_YoloLayers[m_YoloLayerCount - 1].get(); - } - else - { - std::cerr << "ERROR: Unrecognised layer : " << layerName << std::endl; - assert(0); - return nullptr; - } -} - -bool PluginFactory::isPlugin(const char* name) -{ - return ((std::string(name).find("leaky") != std::string::npos) - || (std::string(name).find("reorg") != std::string::npos) - || (std::string(name).find("region") != std::string::npos) - || (std::string(name).find("yolo") != std::string::npos)); -} - -void PluginFactory::destroy() -{ - m_ReorgLayer.reset(); - m_RegionLayer.reset(); - - for (int i = 0; i < m_MaxLeakyLayers; ++i) - { - m_LeakyReLULayers[i].reset(); - } - - for (int i = 0; i < m_MaxYoloLayers; ++i) - { - m_YoloLayers[i].reset(); - } - - m_LeakyReLUCount = 0; - m_YoloLayerCount = 0; -} - /******* Yolo Layer V3 *******/ /*****************************/ -YoloLayerV3::YoloLayerV3(const void* data, size_t length) -{ - const char *d = static_cast(data), *a = d; - read(d, m_NumBoxes); - read(d, m_NumClasses); - read(d,_n_grid_h); - read(d,_n_grid_w); - read(d, m_OutputSize); - assert(d = a + length); -} - -YoloLayerV3::YoloLayerV3(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_,const uint32_t &grid_w_): - m_NumBoxes(numBoxes), - m_NumClasses(numClasses), - _n_grid_h(grid_h_), - _n_grid_w(grid_w_) -{ - assert(m_NumBoxes > 0); - assert(m_NumClasses > 0); - assert(_n_grid_h > 0); - assert(_n_grid_w > 0); - m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses)); -} - -int YoloLayerV3::getNbOutputs() const { return 1; } - -nvinfer1::Dims YoloLayerV3::getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) -{ - assert(index == 0); - assert(nbInputDims == 1); - return inputs[0]; -} - -void YoloLayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs, - const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) -{ - assert(nbInputs == 1); - assert(inputDims != nullptr); -} - -int YoloLayerV3::initialize() { return 0; } - -void YoloLayerV3::terminate() {} - -size_t YoloLayerV3::getWorkspaceSize(int maxBatchSize) const { return 0; } - -int YoloLayerV3::enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, - cudaStream_t stream) +namespace nvinfer1 { - NV_CUDA_CHECK(cudaYoloLayerV3(inputs[0], outputs[0], batchSize,_n_grid_h,_n_grid_w, m_NumClasses, - m_NumBoxes, m_OutputSize, stream)); - return 0; -} + YoloLayer::YoloLayer() + {} + + YoloLayer::YoloLayer(const void* data, size_t length) + { + const char *d = static_cast(data), *a = d; + re(d, m_NumBoxes); + re(d, m_NumClasses); + re(d, _n_grid_h); + re(d, _n_grid_w); + re(d, m_OutputSize); + assert(d = a + length); + } + void YoloLayer::serialize(void* buffer)const noexcept + { + char *d = static_cast(buffer), *a = d; + wr(d, m_NumBoxes); + wr(d, m_NumClasses); + wr(d, _n_grid_h); + wr(d, _n_grid_w); + wr(d, m_OutputSize); + assert(d == a + getSerializationSize()); + } + + bool YoloLayer::supportsFormat(DataType type, PluginFormat format) const noexcept + { + return (type == DataType::kFLOAT && format == PluginFormat::kLINEAR); + } + + void YoloLayer::configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept + { + } + + IPluginV2* YoloLayer::clone() const noexcept + { + YoloLayer *p = new YoloLayer(m_NumBoxes,m_NumClasses,_n_grid_h,_n_grid_w); + p->setPluginNamespace(_s_plugin_namespace.c_str()); + return p; + } + + YoloLayer::YoloLayer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_) : + m_NumBoxes(numBoxes), + m_NumClasses(numClasses), + _n_grid_h(grid_h_), + _n_grid_w(grid_w_) + { + assert(m_NumBoxes > 0); + assert(m_NumClasses > 0); + assert(_n_grid_h > 0); + assert(_n_grid_w > 0); + m_OutputSize = _n_grid_h * _n_grid_w * (m_NumBoxes * (4 + 1 + m_NumClasses)); + } + + int YoloLayer::getNbOutputs() const noexcept { return 1; } + + nvinfer1::Dims YoloLayer::getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nbInputDims) noexcept + { + assert(index == 0); + assert(nbInputDims == 1); + return inputs[0]; + } + + //void YoloLayerV3::configure(const nvinfer1::Dims* inputDims, int nbInputs, + // const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize) noexcept + //{ + // assert(nbInputs == 1); + // assert(inputDims != nullptr); + //} + + int YoloLayer::initialize() noexcept { return 0; } + + void YoloLayer::terminate() noexcept {} + + size_t YoloLayer::getWorkspaceSize(int maxBatchSize) const noexcept + { + return 0; + } + + int YoloLayer::enqueue(int batchSize, + const void* const* inputs, + void* const* outputs, + void* workspace, + cudaStream_t stream) noexcept + { + NV_CUDA_CHECK(cudaYoloLayerV3(inputs[0], outputs[0], batchSize, _n_grid_h, _n_grid_w, m_NumClasses, + m_NumBoxes, m_OutputSize, stream)); + return 0; + } + + int YoloLayer::enqueue(int batchSize, + const void* const* inputs, + void** outputs, + void* workspace, + cudaStream_t stream) noexcept + { + return enqueue(batchSize, inputs, (void* const*)outputs, workspace, stream); + } + + size_t YoloLayer::getSerializationSize()const noexcept + { + return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w) + sizeof(_n_grid_h) + sizeof(m_OutputSize); + } + + + + + PluginFieldCollection YoloLayerPluginCreator::mFC{}; + std::vector YoloLayerPluginCreator::mPluginAttributes; + + YoloLayerPluginCreator::YoloLayerPluginCreator() + { + mPluginAttributes.clear(); + + mFC.nbFields = mPluginAttributes.size(); + mFC.fields = mPluginAttributes.data(); + } + + const char* YoloLayerPluginCreator::getPluginName() const noexcept + { + return "YOLO_TRT"; + } + + const char* YoloLayerPluginCreator::getPluginVersion() const noexcept + { + return "1.0"; + } + + const PluginFieldCollection* YoloLayerPluginCreator::getFieldNames()noexcept + { + return &mFC; + } + + IPluginV2* YoloLayerPluginCreator::createPlugin(const char* name, const PluginFieldCollection* fc)noexcept + { + YoloLayer* obj = new YoloLayer(); + obj->setPluginNamespace(mNamespace.c_str()); + return obj; + } + + IPluginV2* YoloLayerPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)noexcept + { + // This object will be deleted when the network is destroyed, which will + // call MishPlugin::destroy() + YoloLayer* obj = new YoloLayer(serialData, serialLength); + obj->setPluginNamespace(mNamespace.c_str()); + return obj; + } + + + void YoloLayerPluginCreator::setPluginNamespace(const char* libNamespace)noexcept + { + mNamespace = libNamespace; + } + + const char* YoloLayerPluginCreator::getPluginNamespace() const noexcept + { + return mNamespace.c_str(); + } -size_t YoloLayerV3::getSerializationSize() -{ - return sizeof(m_NumBoxes) + sizeof(m_NumClasses) + sizeof(_n_grid_w)+sizeof(_n_grid_h) + sizeof(m_OutputSize); -} -void YoloLayerV3::serialize(void* buffer) -{ - char *d = static_cast(buffer), *a = d; - write(d, m_NumBoxes); - write(d, m_NumClasses); - write(d,_n_grid_h); - write(d,_n_grid_w); - write(d, m_OutputSize); - assert(d == a + getSerializationSize()); } diff --git a/modules/plugin_factory.h b/modules/plugin_factory.h index fe76c39..8223aef 100644 --- a/modules/plugin_factory.h +++ b/modules/plugin_factory.h @@ -31,8 +31,8 @@ SOFTWARE. #include #include #include - -#include "NvInferPlugin.h" +#include +#include "NvInfer.h" #define NV_CUDA_CHECK(status) \ { \ @@ -50,96 +50,161 @@ cudaError_t cudaYoloLayerV3(const void* input, void* output, const uint32_t& bat const uint32_t& numOutputClasses, const uint32_t& numBBoxes, uint64_t outputSize, cudaStream_t stream); -class PluginFactory : public nvinfer1::IPluginFactory -{ - -public: - PluginFactory(); - nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData, - size_t serialLength) override; - bool isPlugin(const char* name); - void destroy(); - -private: - static const int m_MaxLeakyLayers = 72; - static const int m_ReorgStride = 2; - static constexpr float m_LeakyNegSlope = 0.1f; - static const int m_NumBoxes = 5; - static const int m_NumCoords = 4; - static const int m_NumClasses = 80; - static const int m_MaxYoloLayers = 3; - int m_LeakyReLUCount = 0; - int m_YoloLayerCount = 0; - nvinfer1::plugin::RegionParameters m_RegionParameters{m_NumBoxes, m_NumCoords, m_NumClasses, - nullptr}; - - struct INvPluginDeleter - { - void operator()(nvinfer1::plugin::INvPlugin* ptr) - { - if (ptr) - { - ptr->destroy(); - } - } - }; - struct IPluginDeleter - { - void operator()(nvinfer1::IPlugin* ptr) - { - if (ptr) - { - ptr->terminate(); - } - } - }; - typedef std::unique_ptr unique_ptr_INvPlugin; - typedef std::unique_ptr unique_ptr_IPlugin; - - unique_ptr_INvPlugin m_ReorgLayer; - unique_ptr_INvPlugin m_RegionLayer; - unique_ptr_INvPlugin m_LeakyReLULayers[m_MaxLeakyLayers]; - unique_ptr_IPlugin m_YoloLayers[m_MaxYoloLayers]; -}; - -class YoloLayerV3 : public nvinfer1::IPlugin +//class PluginFactory : public nvinfer1::IPluginFactory +//{ +// +//public: +// PluginFactory(); +// nvinfer1::IPlugin* createPlugin(const char* layerName, const void* serialData, +// size_t serialLength); +// bool isPlugin(const char* name); +// void destroy(); +// +//private: +// static const int m_MaxLeakyLayers = 72; +// static const int m_ReorgStride = 2; +// static constexpr float m_LeakyNegSlope = 0.1f; +// static const int m_NumBoxes = 5; +// static const int m_NumCoords = 4; +// static const int m_NumClasses = 80; +// static const int m_MaxYoloLayers = 3; +// int m_LeakyReLUCount = 0; +// int m_YoloLayerCount = 0; +// nvinfer1::plugin::RegionParameters m_RegionParameters{m_NumBoxes, m_NumCoords, m_NumClasses, +// nullptr}; +// +// struct INvPluginDeleter +// { +// void operator()(nvinfer1::plugin::INvPlugin* ptr) +// { +// if (ptr) +// { +// ptr->destroy(); +// } +// } +// }; +// struct IPluginDeleter +// { +// void operator()(nvinfer1::IPlugin* ptr) +// { +// if (ptr) +// { +// ptr->terminate(); +// } +// } +// }; +// typedef std::unique_ptr unique_ptr_INvPlugin; +// typedef std::unique_ptr unique_ptr_IPlugin; +// +// unique_ptr_INvPlugin m_ReorgLayer; +// unique_ptr_INvPlugin m_RegionLayer; +// unique_ptr_INvPlugin m_LeakyReLULayers[m_MaxLeakyLayers]; +// unique_ptr_IPlugin m_YoloLayers[m_MaxYoloLayers]; +//}; +namespace nvinfer1 { -public: - YoloLayerV3(const void* data, size_t length); - YoloLayerV3(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_,const uint32_t &grid_w_); - int getNbOutputs() const override; - nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, - int nbInputDims) override; - void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, - int nbOutputs, int maxBatchSize) override; - int initialize() override; - void terminate() override; - size_t getWorkspaceSize(int maxBatchSize) const override; - int enqueue(int batchSize, const void* const* intputs, void** outputs, void* workspace, - cudaStream_t stream) override; - size_t getSerializationSize() override; - void serialize(void* buffer) override; - -private: - template - void write(char*& buffer, const T& val) - { - *reinterpret_cast(buffer) = val; - buffer += sizeof(T); - } - - template - void read(const char*& buffer, T& val) - { - val = *reinterpret_cast(buffer); - buffer += sizeof(T); - } - uint32_t m_NumBoxes; - uint32_t m_NumClasses; - uint32_t m_GridSize; - uint64_t m_OutputSize; - uint32_t _n_grid_h; - uint32_t _n_grid_w; -}; - + template + void wr(char*& buffer, const T& val) + { + *reinterpret_cast(buffer) = val; + buffer += sizeof(T); + } + + template + void re(const char*& buffer, T& val) + { + val = *reinterpret_cast(buffer); + buffer += sizeof(T); + } + + class YoloLayer : public IPluginV2 + { + public: + explicit YoloLayer(); + YoloLayer(const void* data, size_t length); + YoloLayer(const uint32_t& numBoxes, const uint32_t& numClasses, const uint32_t& grid_h_, const uint32_t &grid_w_); + int getNbOutputs() const noexcept override; + nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, + int nbInputDims)noexcept override; + /*void configure(const nvinfer1::Dims* inputDims, int nbInputs, const nvinfer1::Dims* outputDims, + int nbOutputs, int maxBatchSize)noexcept override;*/ + + /*void configure(const nvinfer1::Dims* inputDims, int nbInputs, + const nvinfer1::Dims* outputDims, int nbOutputs, int maxBatchSize)noexcept override;*/ + + int initialize()noexcept override; + void terminate()noexcept override; + size_t getWorkspaceSize(int maxBatchSize) const noexcept override; + + int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) noexcept; + int enqueue(int batchSize, const void* const* inputs, void* const* outputs, void* workspace, cudaStream_t stream) noexcept; + + size_t getSerializationSize() const noexcept override; + void serialize(void* buffer) const noexcept override; + + const char* getPluginType() const noexcept override + { + return "YOLO_TRT"; + } + bool supportsFormat(DataType type, PluginFormat format) const noexcept override; + void configureWithFormat(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, DataType type, PluginFormat format, int maxBatchSize) noexcept override; + + const char* getPluginVersion() const noexcept override + { + return "1.0"; + } + + void setPluginNamespace(const char* pluginNamespace) noexcept override + { + _s_plugin_namespace = pluginNamespace; + } + const char* getPluginNamespace() const noexcept override + { + return _s_plugin_namespace.c_str(); + } + void destroy() noexcept override + { + delete this; + } + IPluginV2* clone() const noexcept override; + private: + + std::string _s_plugin_namespace; + uint32_t m_NumBoxes; + uint32_t m_NumClasses; + uint32_t m_GridSize; + uint64_t m_OutputSize; + uint32_t _n_grid_h; + uint32_t _n_grid_w; + }; + + + + class YoloLayerPluginCreator : public IPluginCreator + { + public: + YoloLayerPluginCreator(); + + ~YoloLayerPluginCreator() override = default; + + const char* getPluginName() const noexcept override; + + const char* getPluginVersion() const noexcept override; + + const PluginFieldCollection* getFieldNames() noexcept override; + + IPluginV2* createPlugin(const char* name, const PluginFieldCollection* fc) noexcept override; + + IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) noexcept override; + + void setPluginNamespace(const char* libNamespace) noexcept override; + + const char* getPluginNamespace() const noexcept override; + + private: + std::string mNamespace; + static PluginFieldCollection mFC; + static std::vector mPluginAttributes; + }; +} #endif // __PLUGIN_LAYER_H__ diff --git a/modules/trt_utils.cpp b/modules/trt_utils.cpp index efa8762..7750600 100644 --- a/modules/trt_utils.cpp +++ b/modules/trt_utils.cpp @@ -25,25 +25,80 @@ SOFTWARE. #include "trt_utils.h" #include + +#ifdef HAVE_FILESYSTEM +#include +namespace fs = std::filesystem; +#else #include +namespace fs = std::experimental::filesystem; +#endif + #include #include using namespace nvinfer1; REGISTER_TENSORRT_PLUGIN(MishPluginCreator); REGISTER_TENSORRT_PLUGIN(ChunkPluginCreator); REGISTER_TENSORRT_PLUGIN(HardswishPluginCreator); +REGISTER_TENSORRT_PLUGIN(YoloLayerPluginCreator); -cv::Mat blobFromDsImages(const std::vector& inputImages, - const int& inputH, - const int& inputW) +void blobFromDsImages(const std::vector& inputImages, cv::Mat& blob, const int& inputH, const int& inputW) { - std::vector letterboxStack(inputImages.size()); +#if 0 + std::vector letterboxStack; + letterboxStack.reserve(inputImages.size()); for (uint32_t i = 0; i < inputImages.size(); ++i) { - inputImages.at(i).getLetterBoxedImage().copyTo(letterboxStack.at(i)); + letterboxStack.emplace_back(inputImages[i].getLetterBoxedImage()); + } + blob = cv::dnn::blobFromImages(letterboxStack, 1.0, cv::Size(inputW, inputH), cv::Scalar(0.0, 0.0, 0.0), true); + +#else + cv::Size size(inputW, inputH); + constexpr bool swapRB = true; + constexpr int ddepth = CV_32F; + constexpr int nch = 3; + size_t nimages = inputImages.size(); + + int sz[] = { (int)nimages, nch, inputH, inputW }; + blob.create(4, sz, ddepth); + cv::Mat ch[4]; + + for (size_t i = 0; i < nimages; ++i) + { + const cv::Mat& image = inputImages[i].getLetterBoxedImage(); + + for (int j = 0; j < nch; ++j) + { + ch[j] = cv::Mat(size, ddepth, blob.ptr((int)i, j)); + } + + if(swapRB) + std::swap(ch[0], ch[2]); + + for (int y = 0; y < image.rows; ++y) + { + const uchar* imPtr = image.ptr(y); + float* ch0 = ch[0].ptr(y); + float* ch1 = ch[1].ptr(y); + float* ch2 = ch[2].ptr(y); + constexpr size_t stepSize = 32; + for (int x = 0; x < image.cols; x += stepSize) + { + for (size_t k = 0; k < stepSize; ++k) + { + ch0[k] = static_cast(imPtr[0 + 3 * k]); + ch1[k] = static_cast(imPtr[1 + 3 * k]); + ch2[k] = static_cast(imPtr[2 + 3 * k]); + } + imPtr += 3 * stepSize; + ch0 += stepSize; + ch1 += stepSize; + ch2 += stepSize; + } + } } - return cv::dnn::blobFromImages(letterboxStack, 1.0, cv::Size(inputW, inputH), - cv::Scalar(0.0, 0.0, 0.0),true); +#endif } static void leftTrim(std::string& s) @@ -82,7 +137,7 @@ float clamp(const float val, const float minVal, const float maxVal) bool fileExists(const std::string fileName, bool verbose) { - if (!std::experimental::filesystem::exists(std::experimental::filesystem::path(fileName))) + if (!fs::exists(fs::path(fileName))) { if (verbose) std::cout << "File does not exist : " << fileName << std::endl; return false; @@ -104,10 +159,10 @@ BBox convertBBoxNetRes(const float& bx, const float& by, const float& bw, const b.y1 = y - bh / 2; b.y2 = y + bh / 2; - b.x1 = clamp(b.x1, 0, netW); - b.x2 = clamp(b.x2, 0, netW); - b.y1 = clamp(b.y1, 0, netH); - b.y2 = clamp(b.y2, 0, netH); + b.x1 = clamp(b.x1, 0.f, static_cast(netW)); + b.x2 = clamp(b.x2, 0.f, static_cast(netW)); + b.y1 = clamp(b.y1, 0.f, static_cast(netH)); + b.y2 = clamp(b.y2, 0.f, static_cast(netH)); return b; } @@ -127,9 +182,7 @@ void convertBBoxImgRes(const float scalingFactor, bbox.x2 /= scalingFactor; bbox.y1 /= scalingFactor; bbox.y2 /= scalingFactor; - std::cout << "convertBBoxImgRes" << std::endl; - - + std::cout << "convertBBoxImgRes" << std::endl; } void printPredictions(const BBoxInfo& b, const std::string& className) @@ -156,11 +209,9 @@ std::vector loadListFromTextFile(const std::string filename) { if (line.empty()) continue; - else list.push_back(trim(line)); } - return list; } @@ -323,7 +374,7 @@ std::vector nonMaximumSuppression(const float nmsThresh, std::vectordeserializeCudaEngine(modelMem, modelSize, pluginFactory); + = runtime->deserializeCudaEngine(modelMem, modelSize/*, pluginFactory*/); free(modelMem); runtime->destroy(); std::cout << "Loading Complete!" << std::endl; @@ -369,7 +420,7 @@ nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFacto // file.ignore(15); // } //} -std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType) +std::vector loadWeights(const std::string weightsFilePath, const std::string& /*networkType*/) { assert(fileExists(weightsFilePath)); std::cout << "Loading pre-trained weights..." << std::endl; @@ -427,13 +478,15 @@ void displayDimType(const nvinfer1::Dims d) std::cout << "(" << d.nbDims << ") "; for (int i = 0; i < d.nbDims; ++i) { - switch (d.type[i]) - { - case nvinfer1::DimensionType::kSPATIAL: std::cout << "kSPATIAL "; break; - case nvinfer1::DimensionType::kCHANNEL: std::cout << "kCHANNEL "; break; - case nvinfer1::DimensionType::kINDEX: std::cout << "kINDEX "; break; - case nvinfer1::DimensionType::kSEQUENCE: std::cout << "kSEQUENCE "; break; - } + + // switch (d.type[i]) + // { + ////nvinfer1::DimensionOperation:: + // case nvinfer1::DimensionOperation::kSPATIAL: std::cout << "kSPATIAL "; break; + // case nvinfer1::DimensionOperation::kCHANNEL: std::cout << "kCHANNEL "; break; + // case nvinfer1::DimensionOperation::kINDEX: std::cout << "kINDEX "; break; + // case nvinfer1::DimensionOperation::kSEQUENCE: std::cout << "kSEQUENCE "; break; + // } } std::cout << std::endl; } @@ -854,12 +907,12 @@ nvinfer1::ILayer * layer_conv(std::vector &trtWeights_, int size = n_filters_ * chw[0] * n_kernel_size_ * n_kernel_size_; nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; float *conv_wts = new float[size]; + assert(size == (vec_wts.size())); std::vector &vec_wts = map_wts_[s_layer_name_ + ".weight"]; for (int i = 0; i < size; ++i) { conv_wts[i] = vec_wts[i]; } - assert(size == (map_wts_[s_layer_name_ + ".weight"].size())); convWt.values = conv_wts; nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; if (b_bias_) @@ -933,8 +986,6 @@ nvinfer1::ILayer * layer_bottleneck_csp(std::vector &trtWeigh const float e_ ) { std::vector chw=dims2chw(input_->getDimensions()); - //int c1 = dims2chw(input_->getDimensions())[0]; - int c1 = chw[0]; int c_ = int(c2_*0.5); //cv1 auto out = layer_conv_bn_act(trtWeights_, s_model_name_ +".cv1", map_wts_, input_, network_, c_, 1); @@ -988,8 +1039,7 @@ nvinfer1::ILayer * layer_spp(std::vector &trtWeights_, pool->setStrideNd(nvinfer1::DimsHW{1, 1}); concatInputs[ind + 1] = pool->getOutput(0); } - nvinfer1::IConcatenationLayer* concat - = network_->addConcatenation(concatInputs, (vec_args_.size()+1)); + nvinfer1::IConcatenationLayer* concat = network_->addConcatenation(concatInputs, static_cast(vec_args_.size()+1)); //concat->setAxis(0); assert(concat != nullptr); nvinfer1::ILayer *cv2 = layer_conv_bn_act(trtWeights_, s_model_name_ + ".cv2", map_wts_, concat->getOutput(0), network_, c2_, 1); @@ -997,6 +1047,61 @@ nvinfer1::ILayer * layer_spp(std::vector &trtWeights_, return cv2; } +nvinfer1::ILayer * layer_sppf(std::vector &trtWeights_, + std::string s_model_name_, + std::map> &map_wts_, + nvinfer1::INetworkDefinition* network_, + nvinfer1::ITensor* input_, + const int c2_, + int k_) +{ + std::vector chw = dims2chw(input_->getDimensions()); + int c1 = chw[0];//dims2chw(input_->getDimensions())[0]; + int c_ = c1 / 2; + nvinfer1::ILayer * x = layer_conv_bn_act(trtWeights_, s_model_name_ + ".cv1", map_wts_, input_, network_, c_, 1); + nvinfer1::ITensor** concatInputs + = reinterpret_cast(malloc(sizeof(nvinfer1::ITensor*) * 4)); + concatInputs[0] = x->getOutput(0); + + //y1 + nvinfer1::IPoolingLayer* y1 + = network_->addPoolingNd(*x->getOutput(0), + nvinfer1::PoolingType::kMAX, + nvinfer1::DimsHW{ k_,k_ }); + assert(y1); + int pad = k_ / 2; + y1->setPaddingNd(nvinfer1::DimsHW{ pad,pad }); + y1->setStrideNd(nvinfer1::DimsHW{ 1, 1 }); + concatInputs[1] = y1->getOutput(0); + + //y2 + nvinfer1::IPoolingLayer* y2 + = network_->addPoolingNd(*y1->getOutput(0), + nvinfer1::PoolingType::kMAX, + nvinfer1::DimsHW{ k_,k_ }); + assert(y2); + y2->setPaddingNd(nvinfer1::DimsHW{ pad,pad }); + y2->setStrideNd(nvinfer1::DimsHW{ 1, 1 }); + concatInputs[2] = y2->getOutput(0); + + //y3 + nvinfer1::IPoolingLayer* y3 + = network_->addPoolingNd(*y2->getOutput(0), + nvinfer1::PoolingType::kMAX, + nvinfer1::DimsHW{ k_,k_ }); + assert(y3); + y3->setPaddingNd(nvinfer1::DimsHW{ pad,pad }); + y3->setStrideNd(nvinfer1::DimsHW{ 1, 1 }); + concatInputs[3] = y3->getOutput(0); + + nvinfer1::IConcatenationLayer* concat + = network_->addConcatenation(concatInputs, 4); + //concat->setAxis(0); + assert(concat != nullptr); + nvinfer1::ILayer *cv2 = layer_conv_bn_act(trtWeights_, s_model_name_ + ".cv2", map_wts_, concat->getOutput(0), network_, c2_, 1); + assert(cv2 != nullptr); + return cv2; +} nvinfer1::ILayer *layer_upsample(std::string s_model_name_, std::map> &map_wts_, @@ -1032,104 +1137,11 @@ nvinfer1::ILayer * layer_conv_bn_act(std::vector &trtWeights_ const bool b_bn_, const std::string s_act_) { - bool bias = (b_bn_ == true) ? false : true; - int pad = b_padding_ ? ((n_kernel_size_ - 1) / 2) : 0; std::vector chw = dims2chw(input_->getDimensions()); //conv - nvinfer1::ILayer * conv = nullptr; - if (0) - { - int size = n_filters_ * chw[0] * n_kernel_size_ * n_kernel_size_; - nvinfer1::Weights convWt{ nvinfer1::DataType::kFLOAT, nullptr, size }; - std::vector &vec_conv_wts = map_wts_[s_layer_name_ + ".conv.weight"]; - float *conv_wts = new float[size]; - for (int i = 0; i < size; ++i) - { - conv_wts[i] = vec_conv_wts[i]; - } - assert(size == vec_conv_wts.size()); - convWt.values = conv_wts; - nvinfer1::Weights convBias{ nvinfer1::DataType::kFLOAT, nullptr, 0 }; - nvinfer1::IConvolutionLayer* conv = network_->addConvolutionNd( - *input_, - n_filters_, - nvinfer1::DimsHW{ n_kernel_size_, n_kernel_size_ }, - convWt, - convBias); - assert(conv != nullptr); - conv->setPaddingNd(nvinfer1::DimsHW{ pad,pad }); - conv->setStrideNd(nvinfer1::DimsHW{ n_stride_ ,n_stride_ }); - conv->setNbGroups(group_); - if ((!b_bn_) && ("" == s_act_)) - { - return conv; - } - } - else - { - conv = layer_conv(trtWeights_,s_layer_name_ + ".conv", map_wts_, input_, network_, n_filters_, n_kernel_size_, n_stride_); - } - nvinfer1::ILayer* bn = nullptr; - if (0) - { - std::vector &bn_wts = map_wts_[s_layer_name_ + ".bn.weight"]; - /*for (int i = 0; i < n_filters_; ++i) - { - bn_wts.push_back([i]); - }*/ - std::vector &bn_bias = map_wts_[s_layer_name_ + ".bn.bias"]; - /*for (int i = 0; i < n_filters_; ++i) - { - bn_bias.push_back(vec_wts_[s_layer_name_ + ".bias.weight"][i]); - }*/ - std::vector &bn_mean = map_wts_[s_layer_name_ + ".bn.running_mean"]; - /*for (int i = 0; i < n_filters_; ++i) - { - bn_mean.push_back(vec_wts_[s_layer_name_ + ".running_mean.weight"][i]); - }*/ - std::vector &bn_var = map_wts_[s_layer_name_ + ".bn.running_var"]; - for (int i = 0; i < n_filters_; ++i) - { - bn_var[i] = sqrt(bn_var[i] + 1.0e-5); - } - //float bn_num_batches_tracked = map_wts_[s_layer_name_ + ".bn.num_batches_tracked"][0]; - - // create the weights - nvinfer1::Weights shift{ nvinfer1::DataType::kFLOAT, nullptr, n_filters_ }; - nvinfer1::Weights scale{ nvinfer1::DataType::kFLOAT, nullptr, n_filters_ }; - nvinfer1::Weights power{ nvinfer1::DataType::kFLOAT, nullptr, n_filters_ }; - float* shiftWt = new float[n_filters_]; - for (int i = 0; i < n_filters_; ++i) - { - shiftWt[i] - = bn_bias.at(i) - ((bn_mean.at(i) * bn_wts.at(i)) / bn_var.at(i)); - } - shift.values = shiftWt; - float* scaleWt = new float[n_filters_]; - for (int i = 0; i < n_filters_; ++i) - { - scaleWt[i] = bn_wts.at(i) / bn_var[i]; - } - scale.values = scaleWt; - float* powerWt = new float[n_filters_]; - for (int i = 0; i < n_filters_; ++i) - { - powerWt[i] = 1.0; - } - power.values = powerWt; - // Add the batch norm layers - bn = network_->addScale(*conv->getOutput(0), nvinfer1::ScaleMode::kCHANNEL, shift, scale, power); - assert(bn != nullptr); - if ("" == s_act_) - { - return bn; - } - } - else - { - bn = layer_bn(trtWeights_, s_layer_name_, map_wts_, conv->getOutput(0), n_filters_, network_); - }//end bn + nvinfer1::ILayer *conv = layer_conv(trtWeights_, s_layer_name_ + ".conv", map_wts_, input_, network_, n_filters_, n_kernel_size_, n_stride_); + nvinfer1::ILayer* bn = layer_bn(trtWeights_, s_layer_name_, map_wts_, conv->getOutput(0), n_filters_, network_); nvinfer1::ILayer * act = layer_act(bn->getOutput(0), network_,s_act_); return act; } @@ -1167,10 +1179,8 @@ nvinfer1::ILayer* layer_focus(std::vector &trtWeights_, nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, std::map& block, std::vector& weights, - std::vector& trtWeights, - int& weightPtr, - int& inputChannels, - nvinfer1::ITensor* input, + std::vector& trtWeights, int& weightPtr, + int& inputChannels, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "convolutional"); @@ -1252,11 +1262,7 @@ nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, nvinfer1::Weights convBias{nvinfer1::DataType::kFLOAT, nullptr, 0}; trtWeights.push_back(convBias); nvinfer1::IConvolutionLayer* conv = network->addConvolution( - *input, - filters, - nvinfer1::DimsHW{kernelSize, kernelSize}, - convWt, - convBias); + *input, filters, nvinfer1::DimsHW{kernelSize, kernelSize}, convWt, convBias); assert(conv != nullptr); std::string convLayerName = "conv_" + std::to_string(layerIdx); conv->setName(convLayerName.c_str()); @@ -1314,8 +1320,8 @@ nvinfer1::ILayer* netAddConvBNLeaky(int layerIdx, } nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map& block, - std::vector& weights, - std::vector& trtWeights, int& inputChannels, + std::vector& /*weights*/, + std::vector& trtWeights, int& /*inputChannels*/, nvinfer1::ITensor* input, nvinfer1::INetworkDefinition* network) { assert(block.at("type") == "upsample"); @@ -1417,8 +1423,6 @@ nvinfer1::ILayer* netAddUpsample(int layerIdx, std::map #include -#include +#include #include "NvInfer.h" #include "ds_image.h" @@ -49,15 +49,18 @@ SOFTWARE. class DsImage; struct BBox { - float x1, y1, x2, y2; + float x1 = 0; + float y1 = 0; + float x2 = 0; + float y2 = 0; }; struct BBoxInfo { BBox box; - int label; - int classId; // For coco benchmarking - float prob; + int label = 0; + int classId = 0; // For coco benchmarking + float prob = 0; }; class Logger : public nvinfer1::ILogger @@ -65,19 +68,17 @@ class Logger : public nvinfer1::ILogger public: Logger(Severity severity = Severity::kWARNING) { - + severity = severity; } - ~Logger() - { + ~Logger() = default; - } nvinfer1::ILogger& getTRTLogger() { return *this; } - void log(nvinfer1::ILogger::Severity severity, const char* msg) override + void log(nvinfer1::ILogger::Severity severity, const char* msg) noexcept override { // suppress info-level messages if (severity == Severity::kINFO) return; @@ -94,44 +95,43 @@ class Logger : public nvinfer1::ILogger } }; -class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula -{ - -private: - std::set m_SamePaddingLayers; - - nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize, - nvinfer1::DimsHW stride, nvinfer1::DimsHW padding, - nvinfer1::DimsHW dilation, const char* layerName) const override - { - // assert(inputDims.d[0] == inputDims.d[1]); - assert(kernelSize.d[0] == kernelSize.d[1]); - assert(stride.d[0] == stride.d[1]); - assert(padding.d[0] == padding.d[1]); - - int output_h, output_w; - // Only layer maxpool_12 makes use of same padding - if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end()) - { - output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0]; - output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1]; - } - // Valid Padding - else - { - output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1; - output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1; - } - return nvinfer1::DimsHW{output_h, output_w}; - } - -public: - void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); } -}; +//class YoloTinyMaxpoolPaddingFormula : public nvinfer1::IOutputDimensionsFormula +//{ +// +//private: +// std::set m_SamePaddingLayers; +// +// nvinfer1::DimsHW compute(nvinfer1::DimsHW inputDims, nvinfer1::DimsHW kernelSize, +// nvinfer1::DimsHW stride, nvinfer1::DimsHW padding, +// nvinfer1::DimsHW dilation, const char* layerName) const override +// { +// // assert(inputDims.d[0] == inputDims.d[1]); +// assert(kernelSize.d[0] == kernelSize.d[1]); +// assert(stride.d[0] == stride.d[1]); +// assert(padding.d[0] == padding.d[1]); +// +// int output_h, output_w; +// // Only layer maxpool_12 makes use of same padding +// if (m_SamePaddingLayers.find(layerName) != m_SamePaddingLayers.end()) +// { +// output_h = (inputDims.d[0] + 2 * padding.d[0]) / stride.d[0]; +// output_w = (inputDims.d[1] + 2 * padding.d[1]) / stride.d[1]; +// } +// // Valid Padding +// else +// { +// output_h = (inputDims.d[0] - kernelSize.d[0]) / stride.d[0] + 1; +// output_w = (inputDims.d[1] - kernelSize.d[1]) / stride.d[1] + 1; +// } +// return nvinfer1::DimsHW{output_h, output_w}; +// } +// +//public: +// void addSamePaddingLayer(std::string input) { m_SamePaddingLayers.insert(input); } +//}; // Common helper functions -cv::Mat blobFromDsImages(const std::vector& inputImages, const int& inputH, - const int& inputW); +void blobFromDsImages(const std::vector& inputImages, cv::Mat& blob, const int& inputH, const int& inputW); std::string trim(std::string s); std::string triml(std::string s, const char* t); std::string trimr(std::string s, const char* t); @@ -150,7 +150,7 @@ std::vector diou_nms(const float numThresh, std::vector binf std::vector nmsAllClasses(const float nmsThresh, std::vector& binfo, const uint32_t numClasses, const std::string &model_type); std::vector nonMaximumSuppression(const float nmsThresh, std::vector binfo); -nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath, PluginFactory* pluginFactory, +nvinfer1::ICudaEngine* loadTRTEngine(const std::string planFilePath,/* PluginFactory* pluginFactory,*/ Logger& logger); std::vector loadWeights(const std::string weightsFilePath, const std::string& networkType); std::string dimsToString(const nvinfer1::Dims d); @@ -250,6 +250,14 @@ nvinfer1::ILayer * layer_spp(std::vector &trtWeights_, const int c2_, const std::vector &vec_args_); +nvinfer1::ILayer * layer_sppf(std::vector &trtWeights_, + std::string s_model_name_, + std::map> &map_wts_, + nvinfer1::INetworkDefinition* network_, + nvinfer1::ITensor* input_, + const int c2_, + int k_); + nvinfer1::ILayer *layer_upsample(std::string s_model_name_, std::map> &map_wts_, nvinfer1::INetworkDefinition* network_, diff --git a/modules/yolo.cpp b/modules/yolo.cpp index 2b94b20..be7e149 100644 --- a/modules/yolo.cpp +++ b/modules/yolo.cpp @@ -28,6 +28,7 @@ Yolo::Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams) : m_NMSThresh(inferParams.nmsThresh), m_PrintPerfInfo(inferParams.printPerfInfo), m_PrintPredictions(inferParams.printPredictionInfo), + m_BatchSize(inferParams.batchSize), m_Logger(Logger()), m_Network(nullptr), m_Builder(nullptr), @@ -36,58 +37,40 @@ Yolo::Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams) : m_Context(nullptr), m_InputBindingIndex(-1), m_CudaStream(nullptr), - m_PluginFactory(new PluginFactory), - m_TinyMaxpoolPaddingFormula(new YoloTinyMaxpoolPaddingFormula), _n_yolo_ind(0) { // m_ClassNames = loadListFromTextFile(m_LabelsFilePath); m_configBlocks = parseConfigFile(m_ConfigFilePath); if (m_NetworkType == "yolov5") - { parse_cfg_blocks_v5(m_configBlocks); - } else - { parseConfigBlocks(); - } + m_EnginePath = networkInfo.data_path + "-" + m_Precision + "-batch" + std::to_string(m_BatchSize) + ".engine"; if (m_Precision == "kFLOAT") { if ("yolov5" == m_NetworkType) - { - create_engine_yolov5(); - } else - { createYOLOEngine(); - } } else if (m_Precision == "kINT8") { Int8EntropyCalibrator calibrator(m_BatchSize, m_CalibImages, m_CalibImagesFilePath, m_CalibTableFilePath, m_InputSize, m_InputH, m_InputW, - m_InputBlobName,m_NetworkType); + m_InputBlobName, m_NetworkType); if ("yolov5" == m_NetworkType) - { create_engine_yolov5(nvinfer1::DataType::kINT8, &calibrator); - } else - { createYOLOEngine(nvinfer1::DataType::kINT8, &calibrator); - } } else if (m_Precision == "kHALF") { if ("yolov5" == m_NetworkType) - { create_engine_yolov5(nvinfer1::DataType::kHALF, nullptr); - } else - { createYOLOEngine(nvinfer1::DataType::kHALF, nullptr); - } } else { @@ -95,8 +78,8 @@ Yolo::Yolo( const NetworkInfo& networkInfo, const InferParams& inferParams) : assert(0); } - assert(m_PluginFactory != nullptr); - m_Engine = loadTRTEngine(m_EnginePath, m_PluginFactory, m_Logger); + //assert(m_PluginFactory != nullptr); + m_Engine = loadTRTEngine(m_EnginePath,/* m_PluginFactory,*/ m_Logger); assert(m_Engine != nullptr); m_Context = m_Engine->createExecutionContext(); assert(m_Context != nullptr); @@ -124,14 +107,6 @@ Yolo::~Yolo() m_Engine->destroy(); m_Engine = nullptr; } - - if (m_PluginFactory) - { - m_PluginFactory->destroy(); - m_PluginFactory = nullptr; - } - - m_TinyMaxpoolPaddingFormula.reset(); } std::vector split_layer_index(const std::string &s_,const std::string &delimiter_) @@ -169,15 +144,15 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr nvinfer1::ITensor* data = m_Network->addInput( m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, - nvinfer1::DimsCHW{static_cast(m_InputC), static_cast(m_InputH), - static_cast(m_InputW)}); + nvinfer1::Dims{ 3,static_cast(m_InputC), static_cast(m_InputH), + static_cast(m_InputW) }); assert(data != nullptr); // Add elementwise layer to normalize pixel values 0-1 nvinfer1::Dims divDims{ 3, - {static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)}, - {nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL}}; + {static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW)} + /*{nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, + nvinfer1::DimensionType::kSPATIAL}*/}; nvinfer1::Weights divWeights{nvinfer1::DataType::kFLOAT, nullptr, static_cast(m_InputSize)}; float* divWt = new float[m_InputSize]; @@ -194,13 +169,6 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr std::vector tensorOutputs; uint32_t outputTensorCount = 0; - if (/*"yolov3" == m_NetworkType || */"yolov3-tiny" == m_NetworkType) - { - // Set the output dimensions formula for pooling layers - assert(m_TinyMaxpoolPaddingFormula && "Tiny maxpool padding formula not created"); - m_Network->setPoolingOutputDimensionsFormula(m_TinyMaxpoolPaddingFormula.get()); - } - // build the network using the network API for (uint32_t i = 0; i < m_configBlocks.size(); ++i) { @@ -290,13 +258,14 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr * (curYoloTensor.numBBoxes * (5 + curYoloTensor.numClasses)); std::string layerName = "yolo_" + std::to_string(outputTensorCount); curYoloTensor.blobName = layerName; - nvinfer1::IPlugin* yoloPlugin - = new YoloLayerV3(m_OutputTensors.at(outputTensorCount).numBBoxes, + nvinfer1::IPluginV2* yoloPlugin + = new nvinfer1::YoloLayer(m_OutputTensors.at(outputTensorCount).numBBoxes, m_OutputTensors.at(outputTensorCount).numClasses, m_OutputTensors.at(outputTensorCount).grid_h, m_OutputTensors.at(outputTensorCount).grid_w); assert(yoloPlugin != nullptr); - nvinfer1::IPluginLayer* yolo = m_Network->addPlugin(&previous, 1, *yoloPlugin); + nvinfer1::IPluginV2Layer* yolo = m_Network->addPluginV2(&previous, 1, *yoloPlugin); + assert(yolo != nullptr); yolo->setName(layerName.c_str()); std::string inputVol = dimsToString(previous->getDimensions()); @@ -319,9 +288,7 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr for (auto &ind_layer:vec_index) { if (ind_layer < 0) - { ind_layer = static_cast(tensorOutputs.size()) + ind_layer; - } assert(ind_layer < static_cast(tensorOutputs.size()) && ind_layer >= 0); } nvinfer1::ITensor** concatInputs @@ -339,7 +306,7 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr concat->setAxis(0); previous = concat->getOutput(0); assert(previous != nullptr); - nvinfer1::Dims debug = previous->getDimensions(); + //nvinfer1::Dims debug = previous->getDimensions(); std::string outputVol = dimsToString(previous->getDimensions()); int nums = 0; for (auto &indx:vec_index) @@ -354,9 +321,7 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr { int idx = std::stoi(trim(m_configBlocks.at(i).at("layers"))); if (idx < 0) - { idx = static_cast(tensorOutputs.size()) + idx; - } assert(idx < static_cast(tensorOutputs.size()) && idx >= 0); //route @@ -369,15 +334,12 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr channels = getNumChannels(tensorOutputs[idx]); tensorOutputs.push_back(tensorOutputs[idx]); printLayerInfo(layerIndex, "route", " -", outputVol, std::to_string(weightPtr)); - } //yolov4-tiny route split layer else { if (m_configBlocks.at(i).find("group_id") == m_configBlocks.at(i).end()) - { assert(0); - } int chunk_idx = std::stoi(trim(m_configBlocks.at(i).at("group_id"))); nvinfer1::ILayer* out = layer_split(i, tensorOutputs[idx], m_Network); std::string inputVol = dimsToString(previous->getDimensions()); @@ -402,11 +364,6 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr } else if (m_configBlocks.at(i).at("type") == "maxpool") { - // Add same padding layers - if (m_configBlocks.at(i).at("size") == "2" && m_configBlocks.at(i).at("stride") == "1") - { - m_TinyMaxpoolPaddingFormula->addSamePaddingLayer("maxpool_" + std::to_string(i)); - } std::string inputVol = dimsToString(previous->getDimensions()); nvinfer1::ILayer* out = netAddMaxpool(i, m_configBlocks.at(i), previous, m_Network); previous = out->getOutput(0); @@ -463,11 +420,11 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr // m_Builder->setHalf2Mode(true); } - m_Builder->allowGPUFallback(true); + // m_Builder->allowGPUFallback(true); int nbLayers = m_Network->getNbLayers(); int layersOnDLA = 0; // std::cout << "Total number of layers: " << nbLayers << std::endl; - for (int i = 0; i < nbLayers; i++) + /* for (int i = 0; i < nbLayers; i++) { nvinfer1::ILayer* curLayer = m_Network->getLayer(i); if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) @@ -476,7 +433,7 @@ void Yolo::createYOLOEngine(const nvinfer1::DataType dataType, Int8EntropyCalibr layersOnDLA++; std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; } - } + }*/ // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; // Build the engine @@ -652,8 +609,8 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, std::vector trtWeights; int channels = m_InputC; m_Builder = nvinfer1::createInferBuilder(m_Logger); - nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); - m_Network = m_Builder->createNetworkV2(0U); + + m_Network = m_Builder->createNetworkV2(0); if ((dataType == nvinfer1::DataType::kINT8 && !m_Builder->platformHasFastInt8()) || (dataType == nvinfer1::DataType::kHALF && !m_Builder->platformHasFastFp16())) { @@ -663,15 +620,15 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, nvinfer1::ITensor* data = m_Network->addInput( m_InputBlobName.c_str(), nvinfer1::DataType::kFLOAT, - nvinfer1::DimsCHW{ static_cast(m_InputC), static_cast(m_InputH), + nvinfer1::Dims{3, static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW) }); assert(data != nullptr); // Add elementwise layer to normalize pixel values 0-1 nvinfer1::Dims divDims{ 3, - { static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW) }, + { static_cast(m_InputC), static_cast(m_InputH), static_cast(m_InputW) }/*, { nvinfer1::DimensionType::kCHANNEL, nvinfer1::DimensionType::kSPATIAL, - nvinfer1::DimensionType::kSPATIAL } }; + nvinfer1::DimensionType::kSPATIAL }*/ }; nvinfer1::Weights divWeights{ nvinfer1::DataType::kFLOAT, nullptr, @@ -790,6 +747,24 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, tensorOutputs.push_back(out->getOutput(0)); printLayerInfo(layerIndex, "SPP", inputVol, outputVol, ""); }//end SPP + else if ("SPPF" == m_configBlocks.at(i).at("type")) + { + std::string inputVol = dimsToString(previous->getDimensions()); + int filters = 0; + std::vector vec_k; + //parse_spp_args(m_configBlocks[i]["args"], filters, vec_k); + std::vector args = parse_int_list(m_configBlocks[i]["args"]); + filters = args[0]; + int n_out_channel = (n_output != filters) ? make_division(filters*_f_width_multiple, 8) : filters; + std::string s_model_name = "model." + std::to_string(i - 1); + auto out = layer_sppf(trtWeights, s_model_name, model_wts, m_Network, previous, n_out_channel, args[1]); + previous = out->getOutput(0); + assert(previous != nullptr); + channels = getNumChannels(previous); + std::string outputVol = dimsToString(previous->getDimensions()); + tensorOutputs.push_back(out->getOutput(0)); + printLayerInfo(layerIndex, "SPP", inputVol, outputVol, ""); + }//end SPPF else if ("nn.Upsample" == m_configBlocks.at(i).at("type")) { std::string inputVol = dimsToString(previous->getDimensions()); @@ -820,7 +795,7 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, concat_tensor[j] = tensorOutputs[vec_from[j]]; } nvinfer1::IConcatenationLayer* concat - =m_Network->addConcatenation(concat_tensor, vec_from.size()); + =m_Network->addConcatenation(concat_tensor, static_cast(vec_from.size())); concat->setAxis(n_dimension-1); assert(concat != nullptr); previous = concat->getOutput(0); @@ -840,7 +815,7 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, } std::vector vec_args = parse_str_list(m_configBlocks[i]["args"]); std::string s_model_name = "model." + std::to_string(i - 1); - for (size_t ind_from = 0; ind_from < vec_from.size(); ++ind_from) + for (size_t ind_from = 0; ind_from < vec_from.size(); ++ind_from) { int n_filters = (5 + _n_classes) * 3; int from = vec_from[ind_from]; @@ -901,6 +876,7 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, << " precision : " << m_Precision << " and batch size :" << m_BatchSize << std::endl;*/ m_Builder->setMaxBatchSize(m_BatchSize); + nvinfer1::IBuilderConfig* config = m_Builder->createBuilderConfig(); config->setMaxWorkspaceSize(1<<20); if (dataType == nvinfer1::DataType::kINT8) { @@ -917,20 +893,20 @@ void Yolo::create_engine_yolov5(const nvinfer1::DataType dataType, // m_Builder->setHalf2Mode(true); } - m_Builder->allowGPUFallback(true); - int nbLayers = m_Network->getNbLayers(); - int layersOnDLA = 0; - // std::cout << "Total number of layers: " << nbLayers << std::endl; - for (int i = 0; i < nbLayers; i++) - { - nvinfer1::ILayer* curLayer = m_Network->getLayer(i); - if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) - { - m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); - layersOnDLA++; - std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; - } - } +// m_Builder->allowGPUFallback(true); + //int nbLayers = m_Network->getNbLayers(); + //int layersOnDLA = 0; + //// std::cout << "Total number of layers: " << nbLayers << std::endl; + //for (int i = 0; i < nbLayers; i++) + //{ + // nvinfer1::ILayer* curLayer = m_Network->getLayer(i); + // if (m_DeviceType == "kDLA" && m_Builder->canRunOnDLA(curLayer)) + // { + // m_Builder->setDeviceType(curLayer, nvinfer1::DeviceType::kDLA); + // layersOnDLA++; + // std::cout << "Set layer " << curLayer->getName() << " to run on DLA" << std::endl; + // } + //} // std::cout << "Total number of layers on DLA: " << layersOnDLA << std::endl; // Build the engine @@ -971,9 +947,10 @@ void Yolo::load_weights_v5(const std::string s_weights_path_, } std::cout << "Loading complete!" << std::endl; } + void Yolo::doInference(const unsigned char* input, const uint32_t batchSize) { - Timer timer; + //Timer timer; assert(batchSize <= m_BatchSize && "Image batch size exceeds TRT engines batch size"); NV_CUDA_CHECK(cudaMemcpyAsync(m_DeviceBuffers.at(m_InputBindingIndex), input, batchSize * m_InputSize * sizeof(float), cudaMemcpyHostToDevice, @@ -987,21 +964,19 @@ void Yolo::doInference(const unsigned char* input, const uint32_t batchSize) cudaMemcpyDeviceToHost, m_CudaStream)); } cudaStreamSynchronize(m_CudaStream); - timer.out("inference"); + //timer.out("inference"); } std::vector Yolo::decodeDetections(const int& imageIdx, const int& imageH, const int& imageW) { -// Timer timer; std::vector binfo; for (auto& tensor : m_OutputTensors) { std::vector curBInfo = decodeTensor(imageIdx, imageH, imageW, tensor); binfo.insert(binfo.end(), curBInfo.begin(), curBInfo.end()); } -// timer.out("decodeDetections"); return binfo; } @@ -1028,14 +1003,14 @@ std::vector> Yolo::parseConfigFile(const std: } std::string key = "type"; std::string value = trim(line.substr(1, line.size() - 2)); - block.insert(std::pair(key, value)); + block.emplace(key, value); } else { size_t cpos = line.find('='); std::string key = trim(line.substr(0, cpos)); std::string value = trim(line.substr(cpos + 1)); - block.insert(std::pair(key, value)); + block.emplace(key, value); } } blocks.push_back(block); @@ -1059,7 +1034,8 @@ void Yolo::parseConfigBlocks() m_InputH = std::stoul(trim(block.at("height"))); m_InputW = std::stoul(trim(block.at("width"))); m_InputC = std::stoul(trim(block.at("channels"))); - m_BatchSize = std::stoi(trim(block.at("batch"))); + if (m_BatchSize < 1) + m_BatchSize = std::stoi(trim(block.at("batch"))); // assert(m_InputW == m_InputH); m_InputSize = m_InputC * m_InputH * m_InputW; } @@ -1147,7 +1123,7 @@ void Yolo::parseConfigBlocks() outputTensor.stride_w = m_InputW / outputTensor.grid_w; outputTensor.volume = outputTensor.grid_h* outputTensor.grid_w *(outputTensor.numBBoxes*(5 + outputTensor.numClasses)); - m_OutputTensors.push_back(outputTensor); + m_OutputTensors.push_back(outputTensor); _n_yolo_ind++; } } @@ -1177,7 +1153,8 @@ void Yolo::parse_cfg_blocks_v5(const std::vectorgetNbBindings(), nullptr); @@ -1285,8 +1262,10 @@ bool Yolo::verifyYoloEngine() { assert(!strcmp(m_Engine->getBindingName(tensor.bindingIndex), tensor.blobName.c_str()) && "Blobs names dont match between cfg and engine file \n"); - assert(get3DTensorVolume(m_Engine->getBindingDimensions(tensor.bindingIndex)) - == tensor.volume + auto volSize = get3DTensorVolume(m_Engine->getBindingDimensions(tensor.bindingIndex)); + if (volSize != tensor.volume) + std::cerr << "get3DTensorVolume[" << tensor.bindingIndex << "]: " << volSize << " != " << tensor.volume << std::endl; + assert(volSize == tensor.volume && "Tensor volumes dont match between cfg and engine file \n"); } diff --git a/modules/yolo.h b/modules/yolo.h index f122f2f..9bcd31d 100644 --- a/modules/yolo.h +++ b/modules/yolo.h @@ -27,7 +27,6 @@ SOFTWARE. #define _YOLO_H_ #include "calibrator.h" -#include "plugin_factory.h" #include "trt_utils.h" #include "NvInfer.h" @@ -66,12 +65,13 @@ struct NetworkInfo */ struct InferParams { - bool printPerfInfo; - bool printPredictionInfo; + bool printPerfInfo = false; + bool printPredictionInfo = false; std::string calibImages; std::string calibImagesPath; - float probThresh; - float nmsThresh; + float probThresh = 0.5f; + float nmsThresh = 0.5f; + uint32_t batchSize = 1; }; /** @@ -95,7 +95,6 @@ struct TensorInfo float* hostBuffer{nullptr}; }; - class Yolo { public: @@ -130,10 +129,10 @@ class Yolo const std::string m_InputBlobName; std::vector m_OutputTensors; std::vector> m_configBlocks; - uint32_t m_InputH; - uint32_t m_InputW; - uint32_t m_InputC; - uint64_t m_InputSize; + uint32_t m_InputH = 0; + uint32_t m_InputW = 0; + uint32_t m_InputC = 0; + uint64_t m_InputSize = 0; uint32_t _n_classes = 0; float _f_depth_multiple = 0; float _f_width_multiple = 0; @@ -152,23 +151,22 @@ class Yolo //Logger glogger; uint32_t m_BatchSize = 1; nvinfer1::INetworkDefinition* m_Network; - nvinfer1::IBuilder* m_Builder ; + nvinfer1::IBuilder* m_Builder; nvinfer1::IHostMemory* m_ModelStream; nvinfer1::ICudaEngine* m_Engine; nvinfer1::IExecutionContext* m_Context; std::vector m_DeviceBuffers; int m_InputBindingIndex; cudaStream_t m_CudaStream; - PluginFactory* m_PluginFactory; - std::unique_ptr m_TinyMaxpoolPaddingFormula; virtual std::vector decodeTensor(const int imageIdx, const int imageH, const int imageW, const TensorInfo& tensor) = 0; inline void addBBoxProposal(const float bx, const float by, const float bw, const float bh, - const uint32_t stride, const float scalingFactor, const float xOffset, - const float yOffset, const int maxIndex, const float maxProb, + const uint32_t stride, const float scalingFactor, + const float /*xOffset*/, const float /*yOffset*/, + const int maxIndex, const float maxProb, const uint32_t image_w, const uint32_t image_h, std::vector& binfo) { @@ -217,10 +215,10 @@ class Yolo b.y1 = y - bh / 2; b.y2 = y + bh / 2; - b.x1 = clamp(b.x1, 0, netW); - b.x2 = clamp(b.x2, 0, netW); - b.y1 = clamp(b.y1, 0, netH); - b.y2 = clamp(b.y2, 0, netH); + b.x1 = clamp(b.x1, 0.f, static_cast(netW)); + b.x2 = clamp(b.x2, 0.f, static_cast(netW)); + b.y1 = clamp(b.y1, 0.f, static_cast(netH)); + b.y2 = clamp(b.y2, 0.f, static_cast(netH)); return b; } @@ -286,10 +284,10 @@ class Yolo void writePlanFileToDisk(); private: - Timer _timer; - void load_weights_v5(const std::string s_weights_path_, std::map> &vec_wts_); + Timer _timer; + void load_weights_v5(const std::string s_weights_path_, std::map> &vec_wts_); - int _n_yolo_ind = 0; + int _n_yolo_ind = 0; }; #endif // _YOLO_H_ diff --git a/modules/yoloplugin_lib.cpp b/modules/yoloplugin_lib.cpp index ddec85a..af3d46c 100644 --- a/modules/yoloplugin_lib.cpp +++ b/modules/yoloplugin_lib.cpp @@ -107,7 +107,7 @@ YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchS ctx->batchSize = batchSize; ctx->networkInfo;// = getYoloNetworkInfo(); ctx->inferParams;// = getYoloInferParams(); - uint32_t configBatchSize;// = getBatchSize(); + uint32_t configBatchSize = 0;// = getBatchSize(); // Check if config batchsize matches buffer batch size in the pipeline if (ctx->batchSize != configBatchSize) @@ -115,7 +115,7 @@ YoloPluginCtx* YoloPluginCtxInit(YoloPluginInitParams* initParams, size_t batchS std::cerr << "WARNING: Batchsize set in config file overriden by pipeline. New batchsize is " << ctx->batchSize << std::endl; - int npos = ctx->networkInfo.wtsFilePath.find(".weights"); + auto npos = ctx->networkInfo.wtsFilePath.find(".weights"); assert(npos != std::string::npos && "wts file file not recognised. File needs to be of '.weights' format"); std::string dataPath = ctx->networkInfo.wtsFilePath.substr(0, npos); diff --git a/modules/yoloplugin_lib.h b/modules/yoloplugin_lib.h index a8ad2f2..32ef742 100644 --- a/modules/yoloplugin_lib.h +++ b/modules/yoloplugin_lib.h @@ -59,7 +59,7 @@ struct YoloPluginCtx Yolo* inferenceNetwork; // perf vars - float inferTime = 0.0, preTime = 0.0, postTime = 0.0; + double inferTime = 0.0, preTime = 0.0, postTime = 0.0; uint32_t batchSize = 0; uint64_t imageCount = 0; }; diff --git a/modules/yolov3.cpp b/modules/yolov3.cpp index a518f16..8fcca66 100644 --- a/modules/yolov3.cpp +++ b/modules/yolov3.cpp @@ -26,7 +26,7 @@ SOFTWARE. YoloV3::YoloV3(const NetworkInfo& networkInfo, const InferParams& inferParams) : - Yolo( networkInfo, inferParams){} + Yolo(networkInfo, inferParams){} std::vector YoloV3::decodeTensor(const int imageIdx, const int imageH, diff --git a/modules/yolov5.h b/modules/yolov5.h index f7e0b41..db487de 100644 --- a/modules/yolov5.h +++ b/modules/yolov5.h @@ -22,10 +22,10 @@ class YoloV5 :public Yolo b.y1 = y - bh / 2; b.y2 = y + bh / 2; - b.x1 = clamp(b.x1, 0, netW); - b.x2 = clamp(b.x2, 0, netW); - b.y1 = clamp(b.y1, 0, netH); - b.y2 = clamp(b.y2, 0, netH); + b.x1 = clamp(b.x1, 0, static_cast(netW)); + b.x2 = clamp(b.x2, 0, static_cast(netW)); + b.y1 = clamp(b.y1, 0, static_cast(netH)); + b.y2 = clamp(b.y2, 0, static_cast(netH)); return b; } @@ -38,4 +38,4 @@ class YoloV5 :public Yolo const TensorInfo& tensor) override; }; -#endif \ No newline at end of file +#endif