Skip to content

Commit

Permalink
yolov9 t s m
Browse files Browse the repository at this point in the history
  • Loading branch information
WuxinrongY committed Jun 21, 2024
1 parent cc72501 commit e3a46b0
Show file tree
Hide file tree
Showing 6 changed files with 830 additions and 229 deletions.
38 changes: 20 additions & 18 deletions yolov9/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,22 +7,16 @@ The Pytorch implementation is [WongKinYiu/yolov9](https://github.com/WongKinYiu/
<a href="https://github.com/WuxinrongY"><img src="https://github.com/avatars/u/53141838?v=4?s=48" width="40px;" alt=""/></a>

## Progress
- [x] YOLOv9-c:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] YOLOv9-e:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] GELAN-c:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] GELAN-e:
- [x] FP32
- [x] FP16
- [x] INT8
- [x] YOLOv9-t
- [x] YOLOv9-t-convert(gelan)
- [x] YOLOv9-s
- [x] YOLOv9-s-convert(gelan)
- [x] YOLOv9-m
- [x] YOLOv9-m-convert(gelan)
- [x] YOLOv9-c
- [x] YOLOv9-c-convert(gelan)
- [x] YOLOv9-e
- [x] YOLOv9-e-convert(gelan)

## Requirements

Expand All @@ -35,8 +29,16 @@ The speed test is done on a desktop with R7-5700G CPU and RTX 4060Ti GPU. The in

| frame | Model | FP32 | FP16 | INT8 |
| --- | --- | --- | --- | --- |
| pytorch | YOLOv9-c | - | 15.5ms | - |
| pytorch | YOLOv9-e | - | 19.7ms | - |
| tensorrt | YOLOv5-n | -ms | 0.58ms | -ms |
| tensorrt | YOLOv5-s | -ms | 0.90ms | -ms |
| tensorrt | YOLOv5-m | -ms | 1.9ms | -ms |
| tensorrt | YOLOv5-l | -ms | 2.8ms | -ms |
| tensorrt | YOLOv5-x | -ms | 5.1ms | -ms |
| tensorrt | YOLOv9-t-convert | -ms | 1.37ms | -ms |
| tensorrt | YOLOv9-s | -ms | 1.78ms | -ms |
| tensorrt | YOLOv9-s-convert | -ms | 1.78ms | -ms |
| tensorrt | YOLOv9-m | -ms | 3.1ms | -ms |
| tensorrt | YOLOv9-m-convert | -ms | 2.8ms | -ms |
| tensorrt | YOLOv9-c | 13.5ms | 4.6ms | 3.0ms |
| tensorrt | YOLOv9-e | 8.3ms | 3.2ms | 2.15ms |

Expand Down
47 changes: 31 additions & 16 deletions yolov9/demo.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,17 +19,32 @@ void serialize_engine(unsigned int max_batchsize, std::string& wts_name, std::st

// Create model to populate the network, then set the outputs and create an engine
IHostMemory* serialized_engine = nullptr;
if (sub_type == "e") {
serialized_engine = build_engine_yolov9_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
if (sub_type == "t") {
serialized_engine = build_engine_yolov9_t(max_batchsize, builder, config, DataType::kFLOAT, wts_name, false);
} else if (sub_type == "s") {
serialized_engine = build_engine_yolov9_s(max_batchsize, builder, config, DataType::kFLOAT, wts_name, false);
} else if (sub_type == "m") {
serialized_engine = build_engine_yolov9_m(max_batchsize, builder, config, DataType::kFLOAT, wts_name, false);
} else if (sub_type == "c") {
serialized_engine = build_engine_yolov9_c(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else if (sub_type == "ge") {
serialized_engine = build_engine_gelan_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else if (sub_type == "e") {
serialized_engine = build_engine_yolov9_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
}

else if (sub_type == "gt") {
serialized_engine = build_engine_yolov9_t(max_batchsize, builder, config, DataType::kFLOAT, wts_name, true);
} else if (sub_type == "gs") {
serialized_engine = build_engine_yolov9_s(max_batchsize, builder, config, DataType::kFLOAT, wts_name, true);
} else if (sub_type == "gm") {
serialized_engine = build_engine_yolov9_m(max_batchsize, builder, config, DataType::kFLOAT, wts_name, true);
} else if (sub_type == "gc") {
serialized_engine = build_engine_gelan_c(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else if (sub_type == "ge") {
serialized_engine = build_engine_gelan_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
} else {
return;
}

assert(serialized_engine != nullptr);

std::ofstream p(engine_name, std::ios::binary);
Expand Down Expand Up @@ -114,19 +129,19 @@ int main(int argc, char** argv) {
cudaSetDevice(kGpuId);

std::string wts_name = "";
std::string engine_name = "";
std::string img_dir = "";
std::string sub_type = "";
std::string engine_name = "../yolov9-m-converted.engine";
std::string img_dir = "../images";
std::string sub_type = "m";
// speed test or inference
// const int speed_test_iter = 1000;
const int speed_test_iter = 1;

if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type)) {
std::cerr << "Arguments not right!" << std::endl;
std::cerr << "./yolov9 -s [.wts] [.engine] [c/e/gc/ge] // serialize model to plan file" << std::endl;
std::cerr << "./yolov9 -d [.engine] ../samples // deserialize plan file and run inference" << std::endl;
return -1;
}
const int speed_test_iter = 1000;
// const int speed_test_iter = 1;

// if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type)) {
// std::cerr << "Arguments not right!" << std::endl;
// std::cerr << "./yolov9 -s [.wts] [.engine] [s/m/c/e/gt/gs/gm/gc/ge] // serialize model to plan file" << std::endl;
// std::cerr << "./yolov9 -d [.engine] ../samples // deserialize plan file and run inference" << std::endl;
// return -1;
// }

// Create a model using the API directly and serialize it to a file
if (!wts_name.empty()) {
Expand Down
4 changes: 4 additions & 0 deletions yolov9/include/block.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,14 @@ std::vector<std::vector<float>> getAnchors(std::map<std::string, Weights>& weigh
// ----------------------------------------------------------------
nvinfer1::ILayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname, int g = 1);
ILayer* ELAN1(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2,
int c3, int c4, std::string lname);
ILayer* RepNCSPELAN4(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1,
int c2, int c3, int c4, int c5, std::string lname);
ILayer* ADown(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
std::string lname);
ILayer* AConv(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
std::string lname);
std::vector<ILayer*> CBLinear(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input,
std::vector<int> c2s, int k, int s, int p, int g, std::string lname);
ILayer* CBFuse(INetworkDefinition* network, std::vector<std::vector<ILayer*>> input, std::vector<int> idx,
Expand Down
24 changes: 20 additions & 4 deletions yolov9/include/model.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,16 +2,32 @@

#include <NvInfer.h>
#include <string>
nvinfer1::IHostMemory* build_engine_yolov9_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
// yolov9
nvinfer1::IHostMemory* build_engine_yolov9_t(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);
std::string& wts_name, bool isConvert = false);
nvinfer1::IHostMemory* build_engine_yolov9_s(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name, bool isConvert = false);
nvinfer1::IHostMemory* build_engine_yolov9_m(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name, bool isConvert = false);
nvinfer1::IHostMemory* build_engine_yolov9_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);

nvinfer1::IHostMemory* build_engine_gelan_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IHostMemory* build_engine_yolov9_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);
// gelan
nvinfer1::IHostMemory* build_engine_gelan_t(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);
nvinfer1::IHostMemory* build_engine_gelan_m(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);
nvinfer1::IHostMemory* build_engine_gelan_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);
nvinfer1::IHostMemory* build_engine_gelan_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
std::string& wts_name);
35 changes: 34 additions & 1 deletion yolov9/src/block.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,29 @@ ILayer* RepNCSP(INetworkDefinition* network, std::map<std::string, Weights>& wei
auto cv3 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv3", 1);
return cv3;
}

ILayer* ELAN1(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2,
int c3, int c4, std::string lname) {
auto cv1 = convBnSiLU(network, weightMap, input, c3, 1, 1, 0, lname + ".cv1", 1);
// 将cv1的输出分成两部分 chunk(2, 1)

nvinfer1::Dims d = cv1->getOutput(0)->getDimensions();
nvinfer1::ISliceLayer* split1 =
network->addSlice(*cv1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{d.d[0] / 2, d.d[1], d.d[2]},
nvinfer1::Dims3{1, 1, 1});
nvinfer1::ISliceLayer* split2 =
network->addSlice(*cv1->getOutput(0), nvinfer1::Dims3{d.d[0] / 2, 0, 0},
nvinfer1::Dims3{d.d[0] / 2, d.d[1], d.d[2]}, nvinfer1::Dims3{1, 1, 1});
auto cv2 = convBnSiLU(network, weightMap, *split2->getOutput(0), c4, 3, 1, 1, lname + ".cv2", 1);

auto cv3 = convBnSiLU(network, weightMap, *cv2->getOutput(0), c4, 3, 1, 1, lname + ".cv3", 1);

ITensor* inputTensors[] = {split1->getOutput(0), split2->getOutput(0), cv2->getOutput(0), cv3->getOutput(0)};
auto cat = network->addConcatenation(inputTensors, 4);
auto cv4 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv4", 1);
return cv4;
}

ILayer* RepNCSPELAN4(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1,
int c2, int c3, int c4, int c5, std::string lname) {

Expand All @@ -230,6 +253,14 @@ ILayer* RepNCSPELAN4(INetworkDefinition* network, std::map<std::string, Weights>
return cv4;
}

ILayer* AConv(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
std::string lname) {
auto pool = network->addPoolingNd(input, PoolingType::kAVERAGE, DimsHW{2, 2});
pool->setStrideNd(DimsHW{1, 1});
pool->setPaddingNd(DimsHW{0, 0});
auto cv1 = convBnSiLU(network, weightMap, *pool->getOutput(0), c2, 3, 2, 1, lname + ".cv1", 1);
return cv1;
}
ILayer* ADown(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
std::string lname) {
int c_ = c2 / 2;
Expand Down Expand Up @@ -426,7 +457,9 @@ std::vector<IConcatenationLayer*> DualDDetect(INetworkDefinition* network, std::
std::vector<IConcatenationLayer*> DDetect(INetworkDefinition* network, std::map<std::string, Weights>& weightMap,
std::vector<ILayer*> dets, int cls, std::vector<int> ch, std::string lname) {
int c2 = std::max(int(ch[0] / 4), int(16 * 4));
int c3 = std::max(ch[0], std::min(cls * 2, 128));
// max((ch[0], min((self.nc * 2, 128))))
// int c3 = std::max(ch[0], std::min(cls * 2, 128));
int c3 = std::max(ch[0], std::min(cls, 128));
int reg_max = 16;

std::vector<ILayer*> bboxlayers;
Expand Down
Loading

0 comments on commit e3a46b0

Please sign in to comment.