Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

1、添加PPOCRv3对弯曲文字区域检测能力 2、修复rec前置处理属性设置错误 #2369

Open
wants to merge 5 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion c_api/fastdeploy_capi/vision/ocr/ppocr/model.cc
Original file line number Diff line number Diff line change
Expand Up @@ -288,7 +288,7 @@ FD_C_Bool FD_C_DBDetectorWrapperBatchPredict(
FD_C_DBDetectorWrapper* fd_c_dbdetector_wrapper, FD_C_OneDimMat imgs,
FD_C_ThreeDimArrayInt32* det_results) {
std::vector<cv::Mat> imgs_vec;
std::vector<std::vector<std::array<int, 8>>> det_results_out;
std::vector<std::vector<std::vector<std::array<int, 2>>>> det_results_out;
for (int i = 0; i < imgs.size; i++) {
imgs_vec.push_back(*(reinterpret_cast<cv::Mat*>(imgs.data[i])));
}
Expand Down
2 changes: 1 addition & 1 deletion fastdeploy/vision/common/processors/manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ namespace vision {
*/
class FASTDEPLOY_DECL ProcessorManager {
public:
~ProcessorManager();
virtual ~ProcessorManager();

/** \brief Use CUDA to boost the performance of processors
*
Expand Down
16 changes: 8 additions & 8 deletions fastdeploy/vision/common/result.cc
Original file line number Diff line number Diff line change
Expand Up @@ -695,11 +695,11 @@ std::string OCRResult::Str() {
std::string out;
for (int n = 0; n < boxes.size(); n++) {
out = out + "det boxes: [";
for (int i = 0; i < 4; i++) {
out = out + "[" + std::to_string(boxes[n][i * 2]) + "," +
std::to_string(boxes[n][i * 2 + 1]) + "]";
for (int i = 0; i < boxes[n].size(); i++) {
out = out + "[" + std::to_string(boxes[n][i][0]) + "," +
std::to_string(boxes[n][i][1]) + "]";

if (i != 3) {
if (i != boxes[n].size() - 1) {
out = out + ",";
}
}
Expand All @@ -720,8 +720,8 @@ std::string OCRResult::Str() {
for (int n = 0; n < boxes.size(); n++) {
out = out + "table boxes: [";
for (int i = 0; i < 4; i++) {
out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
std::to_string(table_boxes[n][i * 2 + 1]) + "]";
out = out + "[" + std::to_string(table_boxes[n][i][0]) + "," +
std::to_string(table_boxes[n][i][1]) + "]";

if (i != 3) {
out = out + ",";
Expand Down Expand Up @@ -778,8 +778,8 @@ std::string OCRResult::Str() {
for (int n = 0; n < table_boxes.size(); n++) {
out = out + "table boxes: [";
for (int i = 0; i < 4; i++) {
out = out + "[" + std::to_string(table_boxes[n][i * 2]) + "," +
std::to_string(table_boxes[n][i * 2 + 1]) + "]";
out = out + "[" + std::to_string(table_boxes[n][i][0]) + "," +
std::to_string(table_boxes[n][i][1]) + "]";

if (i != 3) {
out = out + ",";
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/vision/common/result.h
Original file line number Diff line number Diff line change
Expand Up @@ -223,15 +223,15 @@ struct FASTDEPLOY_DECL KeyPointDetectionResult : public BaseResult {
};

struct FASTDEPLOY_DECL OCRResult : public BaseResult {
std::vector<std::array<int, 8>> boxes;
std::vector<std::vector<std::array<int, 2>>> boxes;

std::vector<std::string> text;
std::vector<float> rec_scores;

std::vector<float> cls_scores;
std::vector<int32_t> cls_labels;

std::vector<std::array<int, 8>> table_boxes;
std::vector<std::vector<std::array<int, 2>>> table_boxes;
std::vector<std::string> table_structure;
std::string table_html;

Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/vision/ocr/ppocr/dbdetector.cc
Original file line number Diff line number Diff line change
Expand Up @@ -63,8 +63,8 @@ std::unique_ptr<DBDetector> DBDetector::Clone() const {
}

bool DBDetector::Predict(const cv::Mat& img,
std::vector<std::array<int, 8>>* boxes_result) {
std::vector<std::vector<std::array<int, 8>>> det_results;
std::vector<std::vector<std::array<int, 2>>>* boxes_result) {
std::vector<std::vector<std::vector<std::array<int, 2>>>> det_results;
if (!BatchPredict({img}, &det_results)) {
return false;
}
Expand All @@ -81,7 +81,7 @@ bool DBDetector::Predict(const cv::Mat& img, vision::OCRResult* ocr_result) {

bool DBDetector::BatchPredict(const std::vector<cv::Mat>& images,
std::vector<vision::OCRResult>* ocr_results) {
std::vector<std::vector<std::array<int, 8>>> det_results;
std::vector<std::vector<std::vector<std::array<int, 2>>>> det_results;
if (!BatchPredict(images, &det_results)) {
return false;
}
Expand All @@ -94,7 +94,7 @@ bool DBDetector::BatchPredict(const std::vector<cv::Mat>& images,

bool DBDetector::BatchPredict(
const std::vector<cv::Mat>& images,
std::vector<std::vector<std::array<int, 8>>>* det_results) {
std::vector<std::vector<std::vector<std::array<int, 2>>>>* det_results) {
std::vector<FDMat> fd_images = WrapMat(images);
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
FDERROR << "Failed to preprocess input image." << std::endl;
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/vision/ocr/ppocr/dbdetector.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel {
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(const cv::Mat& img,
std::vector<std::array<int, 8>>* boxes_result);
std::vector<std::vector<std::array<int, 2>>>* boxes_result);

/** \brief Predict the input image and get OCR detection model result.
*
Expand All @@ -77,7 +77,7 @@ class FASTDEPLOY_DECL DBDetector : public FastDeployModel {
* \return true if the prediction is successed, otherwise false.
*/
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
std::vector<std::vector<std::array<int, 8>>>* det_results);
std::vector<std::vector<std::vector<std::array<int, 2>>>>* det_results);

/** \brief BatchPredict the input image and get OCR detection model result.
*
Expand Down
21 changes: 13 additions & 8 deletions fastdeploy/vision/ocr/ppocr/det_postprocessor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ namespace ocr {
bool DBDetectorPostprocessor::SingleBatchPostprocessor(
const float* out_data, int n2, int n3,
const std::array<int, 4>& det_img_info,
std::vector<std::array<int, 8>>* boxes_result) {
std::vector<std::vector<std::array<int, 2>>>* boxes_result) {
int n = n2 * n3;

// prepare bitmap
Expand All @@ -47,22 +47,27 @@ bool DBDetectorPostprocessor::SingleBatchPostprocessor(
cv::dilate(bit_map, bit_map, dila_ele);
}

std::vector<std::vector<std::vector<int>>> boxes;
std::vector<std::vector<std::array<int, 2>>> boxes;

boxes = util_post_processor_.BoxesFromBitmap(
if (det_db_use_ploy_)
{
boxes = util_post_processor_.PloygonsFromBitmap(
pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_,
det_db_score_mode_);
} else {
boxes = util_post_processor_.BoxesFromBitmap(
pred_map, bit_map, det_db_box_thresh_, det_db_unclip_ratio_,
det_db_score_mode_);
}

boxes = util_post_processor_.FilterTagDetRes(boxes, det_img_info);

// boxes to boxes_result
for (int i = 0; i < boxes.size(); i++) {
std::array<int, 8> new_box;
std::vector<std::array<int, 2>> new_box;
int k = 0;
for (auto& vec : boxes[i]) {
for (auto& e : vec) {
new_box[k++] = e;
}
new_box.emplace_back(vec);
}
boxes_result->emplace_back(new_box);
}
Expand All @@ -72,7 +77,7 @@ bool DBDetectorPostprocessor::SingleBatchPostprocessor(

bool DBDetectorPostprocessor::Run(
const std::vector<FDTensor>& tensors,
std::vector<std::vector<std::array<int, 8>>>* results,
std::vector<std::vector<std::vector<std::array<int, 2>>>>* results,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
// DBDetector have only 1 output tensor.
const FDTensor& tensor = tensors[0];
Expand Down
9 changes: 7 additions & 2 deletions fastdeploy/vision/ocr/ppocr/det_postprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class FASTDEPLOY_DECL DBDetectorPostprocessor {
* \return true if the postprocess successed, otherwise false
*/
bool Run(const std::vector<FDTensor>& tensors,
std::vector<std::vector<std::array<int, 8>>>* results,
std::vector<std::vector<std::vector<std::array<int, 2>>>>* results,
const std::vector<std::array<int, 4>>& batch_det_img_info);

/// Set det_db_thresh for the detection postprocess, default is 0.3
Expand Down Expand Up @@ -67,17 +67,22 @@ class FASTDEPLOY_DECL DBDetectorPostprocessor {
/// Get use_dilation of the detection postprocess
int GetUseDilation() const { return use_dilation_; }

/// Set det_db_use_ploy for the detection postprocess, default is fasle
void SetDetDBUsePloy(int det_db_use_ploy) { det_db_use_ploy_ = det_db_use_ploy; }
/// Get det_db_use_ploy of the detection postprocess
int GetDetDBUsePloy() const { return det_db_use_ploy_; }

private:
double det_db_thresh_ = 0.3;
double det_db_box_thresh_ = 0.6;
double det_db_unclip_ratio_ = 1.5;
std::string det_db_score_mode_ = "slow";
bool use_dilation_ = false;
bool det_db_use_ploy_ = false;
PostProcessor util_post_processor_;
bool SingleBatchPostprocessor(const float* out_data, int n2, int n3,
const std::array<int, 4>& det_img_info,
std::vector<std::array<int, 8>>* boxes_result);
std::vector<std::vector<std::array<int, 2>>>* boxes_result);
};

} // namespace ocr
Expand Down
14 changes: 8 additions & 6 deletions fastdeploy/vision/ocr/ppocr/ocrmodel_pybind.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@

namespace fastdeploy {
void BindPPOCRModel(pybind11::module& m) {
m.def("sort_boxes", [](std::vector<std::array<int, 8>>& boxes) {
m.def("sort_boxes", [](std::vector<std::vector<std::array<int, 2>>>& boxes) {
vision::ocr::SortBoxes(&boxes);
return boxes;
});
Expand Down Expand Up @@ -77,12 +77,14 @@ void BindPPOCRModel(pybind11::module& m) {
.def_property("use_dilation",
&vision::ocr::DBDetectorPostprocessor::GetUseDilation,
&vision::ocr::DBDetectorPostprocessor::SetUseDilation)

.def_property("det_db_use_ploy",
&vision::ocr::DBDetectorPostprocessor::GetDetDBUsePloy,
&vision::ocr::DBDetectorPostprocessor::SetDetDBUsePloy)
.def("run",
[](vision::ocr::DBDetectorPostprocessor& self,
std::vector<FDTensor>& inputs,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
std::vector<std::vector<std::array<int, 8>>> results;
std::vector<std::vector<std::vector<std::array<int, 2>>>> results;

if (!self.Run(inputs, &results, batch_det_img_info)) {
throw std::runtime_error(
Expand All @@ -95,7 +97,7 @@ void BindPPOCRModel(pybind11::module& m) {
[](vision::ocr::DBDetectorPostprocessor& self,
std::vector<pybind11::array>& input_array,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
std::vector<std::vector<std::array<int, 8>>> results;
std::vector<std::vector<std::vector<std::array<int, 2>>>> results;
std::vector<FDTensor> inputs;
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
if (!self.Run(inputs, &results, batch_det_img_info)) {
Expand Down Expand Up @@ -355,7 +357,7 @@ void BindPPOCRModel(pybind11::module& m) {
[](vision::ocr::StructureV2TablePostprocessor& self,
std::vector<FDTensor>& inputs,
const std::vector<std::array<int, 4>>& batch_det_img_info) {
std::vector<std::vector<std::array<int, 8>>> boxes;
std::vector<std::vector<std::vector<std::array<int, 2>>>> boxes;
std::vector<std::vector<std::string>> structure_list;

if (!self.Run(inputs, &boxes, &structure_list,
Expand All @@ -372,7 +374,7 @@ void BindPPOCRModel(pybind11::module& m) {
const std::vector<std::array<int, 4>>& batch_det_img_info) {
std::vector<FDTensor> inputs;
PyArrayToTensorList(input_array, &inputs, /*share_buffer=*/true);
std::vector<std::vector<std::array<int, 8>>> boxes;
std::vector<std::vector<std::vector<std::array<int, 2>>>> boxes;
std::vector<std::vector<std::string>> structure_list;

if (!self.Run(inputs, &boxes, &structure_list,
Expand Down
10 changes: 5 additions & 5 deletions fastdeploy/vision/ocr/ppocr/ppocr_v2.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ int PPOCRv2::GetRecBatchSize() {
}

bool PPOCRv2::Initialized() const {

if (detector_ != nullptr && !detector_->Initialized()) {
return false;
}
Expand All @@ -76,7 +76,7 @@ bool PPOCRv2::Initialized() const {
if (recognizer_ != nullptr && !recognizer_->Initialized()) {
return false;
}
return true;
return true;
}

std::unique_ptr<PPOCRv2> PPOCRv2::Clone() const {
Expand Down Expand Up @@ -109,7 +109,7 @@ bool PPOCRv2::BatchPredict(const std::vector<cv::Mat>& images,
std::vector<fastdeploy::vision::OCRResult>* batch_result) {
batch_result->clear();
batch_result->resize(images.size());
std::vector<std::vector<std::array<int, 8>>> batch_boxes(images.size());
std::vector<std::vector<std::vector<std::array<int, 2>>>> batch_boxes(images.size());

if (!detector_->BatchPredict(images, &batch_boxes)) {
FDERROR << "There's error while detecting image in PPOCR." << std::endl;
Expand All @@ -120,11 +120,11 @@ bool PPOCRv2::BatchPredict(const std::vector<cv::Mat>& images,
vision::ocr::SortBoxes(&(batch_boxes[i_batch]));
(*batch_result)[i_batch].boxes = batch_boxes[i_batch];
}

for(int i_batch = 0; i_batch < images.size(); ++i_batch) {
fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch];
// Get croped images by detection result
const std::vector<std::array<int, 8>>& boxes = ocr_result.boxes;
const std::vector<std::vector<std::array<int, 2>>>& boxes = ocr_result.boxes;
const cv::Mat& img = images[i_batch];
std::vector<cv::Mat> image_list;
if (boxes.size() == 0) {
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/vision/ocr/ppocr/ppstructurev2_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ bool PPStructureV2Table::BatchPredict(
std::vector<fastdeploy::vision::OCRResult>* batch_result) {
batch_result->clear();
batch_result->resize(images.size());
std::vector<std::vector<std::array<int, 8>>> batch_boxes(images.size());
std::vector<std::vector<std::vector<std::array<int, 2>>>> batch_boxes(images.size());

if (!detector_->BatchPredict(images, &batch_boxes)) {
FDERROR << "There's error while detecting image in PPOCR." << std::endl;
Expand All @@ -98,7 +98,7 @@ bool PPStructureV2Table::BatchPredict(
for (int i_batch = 0; i_batch < images.size(); ++i_batch) {
fastdeploy::vision::OCRResult& ocr_result = (*batch_result)[i_batch];
// Get croped images by detection result
const std::vector<std::array<int, 8>>& boxes = ocr_result.boxes;
const std::vector<std::vector<std::array<int, 2>>>& boxes = ocr_result.boxes;
const cv::Mat& img = images[i_batch];
std::vector<cv::Mat> image_list;
if (boxes.size() == 0) {
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/vision/ocr/ppocr/rec_preprocessor.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,9 @@ class FASTDEPLOY_DECL RecognizerPreprocessor : public ProcessorManager {
std::vector<int> GetRecImageShape() { return rec_image_shape_; }

/// This function will disable normalize in preprocessing step.
void DisableNormalize() { disable_permute_ = true; }
void DisableNormalize() { disable_normalize_ = true; }
/// This function will disable hwc2chw in preprocessing step.
void DisablePermute() { disable_normalize_ = true; }
void DisablePermute() { disable_permute_ = true; }

private:
void OcrRecognizerResizeImage(FDMat* mat, float max_wh_ratio,
Expand Down
8 changes: 4 additions & 4 deletions fastdeploy/vision/ocr/ppocr/structurev2_table.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,9 @@ std::unique_ptr<StructureV2Table> StructureV2Table::Clone() const {
}

bool StructureV2Table::Predict(const cv::Mat& img,
std::vector<std::array<int, 8>>* boxes_result,
std::vector<std::vector<std::array<int, 2>>>* boxes_result,
std::vector<std::string>* structure_result) {
std::vector<std::vector<std::array<int, 8>>> det_results;
std::vector<std::vector<std::vector<std::array<int, 2>>>> det_results;
std::vector<std::vector<std::string>> structure_results;
if (!BatchPredict({img}, &det_results, &structure_results)) {
return false;
Expand All @@ -89,7 +89,7 @@ bool StructureV2Table::Predict(const cv::Mat& img,
bool StructureV2Table::BatchPredict(
const std::vector<cv::Mat>& images,
std::vector<vision::OCRResult>* ocr_results) {
std::vector<std::vector<std::array<int, 8>>> det_results;
std::vector<std::vector<std::vector<std::array<int, 2>>>> det_results;
std::vector<std::vector<std::string>> structure_results;
if (!BatchPredict(images, &det_results, &structure_results)) {
return false;
Expand All @@ -104,7 +104,7 @@ bool StructureV2Table::BatchPredict(

bool StructureV2Table::BatchPredict(
const std::vector<cv::Mat>& images,
std::vector<std::vector<std::array<int, 8>>>* det_results,
std::vector<std::vector<std::vector<std::array<int, 2>>>>* det_results,
std::vector<std::vector<std::string>>* structure_results) {
std::vector<FDMat> fd_images = WrapMat(images);
if (!preprocessor_.Run(&fd_images, &reused_input_tensors_)) {
Expand Down
4 changes: 2 additions & 2 deletions fastdeploy/vision/ocr/ppocr/structurev2_table.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel {
* \return true if the prediction is successed, otherwise false.
*/
virtual bool Predict(const cv::Mat& img,
std::vector<std::array<int, 8>>* boxes_result,
std::vector<std::vector<std::array<int, 2>>>* boxes_result,
std::vector<std::string>* structure_result);

/** \brief Predict the input image and get OCR detection model result.
Expand All @@ -80,7 +80,7 @@ class FASTDEPLOY_DECL StructureV2Table : public FastDeployModel {
* \return true if the prediction is successed, otherwise false.
*/
virtual bool BatchPredict(const std::vector<cv::Mat>& images,
std::vector<std::vector<std::array<int, 8>>>* det_results,
std::vector<std::vector<std::vector<std::array<int, 2>>>>* det_results,
std::vector<std::vector<std::string>>* structure_results);

/** \brief BatchPredict the input image and get OCR detection model result.
Expand Down
Loading
Loading