yolov9 t s m

wang-xinyu · Jun 21, 2024 · e3a46b0 · e3a46b0
1 parent cc72501
commit e3a46b0
Show file tree

Hide file tree

Showing 6 changed files with 830 additions and 229 deletions.
diff --git a/yolov9/README.md b/yolov9/README.md
@@ -7,22 +7,16 @@ The Pytorch implementation is [WongKinYiu/yolov9](https://github.com/WongKinYiu/
 <a href="https://github.com/WuxinrongY"><img src="https://github.com/avatars/u/53141838?v=4?s=48" width="40px;" alt=""/></a>
 
 ## Progress
-- [x] YOLOv9-c:
-  - [x] FP32
-  - [x] FP16
-  - [x] INT8
-- [x] YOLOv9-e:
-  - [x] FP32
-  - [x] FP16
-  - [x] INT8
-- [x] GELAN-c:
-  - [x] FP32
-  - [x] FP16
-  - [x] INT8
-- [x] GELAN-e:
-  - [x] FP32
-  - [x] FP16
-  - [x] INT8
+- [x] YOLOv9-t
+- [x] YOLOv9-t-convert(gelan)
+- [x] YOLOv9-s
+- [x] YOLOv9-s-convert(gelan)
+- [x] YOLOv9-m
+- [x] YOLOv9-m-convert(gelan)
+- [x] YOLOv9-c
+- [x] YOLOv9-c-convert(gelan)
+- [x] YOLOv9-e
+- [x] YOLOv9-e-convert(gelan)
 
 ## Requirements
 
@@ -35,8 +29,16 @@ The speed test is done on a desktop with R7-5700G CPU and RTX 4060Ti GPU. The in
 
 | frame  | Model | FP32 | FP16 | INT8 |
 | --- | --- | --- | --- | --- |
-| pytorch | YOLOv9-c | - | 15.5ms | - |
-| pytorch | YOLOv9-e | - | 19.7ms | - |
+| tensorrt | YOLOv5-n | -ms | 0.58ms | -ms |
+| tensorrt | YOLOv5-s | -ms | 0.90ms | -ms |
+| tensorrt | YOLOv5-m | -ms | 1.9ms | -ms |
+| tensorrt | YOLOv5-l | -ms | 2.8ms | -ms |
+| tensorrt | YOLOv5-x | -ms | 5.1ms | -ms |
+| tensorrt | YOLOv9-t-convert | -ms | 1.37ms | -ms |
+| tensorrt | YOLOv9-s | -ms | 1.78ms | -ms |
+| tensorrt | YOLOv9-s-convert | -ms | 1.78ms | -ms |
+| tensorrt | YOLOv9-m | -ms | 3.1ms | -ms |
+| tensorrt | YOLOv9-m-convert | -ms | 2.8ms | -ms |
 | tensorrt | YOLOv9-c | 13.5ms | 4.6ms | 3.0ms |
 | tensorrt | YOLOv9-e | 8.3ms | 3.2ms | 2.15ms |
 

diff --git a/yolov9/demo.cpp b/yolov9/demo.cpp
@@ -19,17 +19,32 @@ void serialize_engine(unsigned int max_batchsize, std::string& wts_name, std::st
 
     // Create model to populate the network, then set the outputs and create an engine
     IHostMemory* serialized_engine = nullptr;
-    if (sub_type == "e") {
-        serialized_engine = build_engine_yolov9_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
+    if (sub_type == "t") {
+        serialized_engine = build_engine_yolov9_t(max_batchsize, builder, config, DataType::kFLOAT, wts_name, false);
+    } else if (sub_type == "s") {
+        serialized_engine = build_engine_yolov9_s(max_batchsize, builder, config, DataType::kFLOAT, wts_name, false);
+    } else if (sub_type == "m") {
+        serialized_engine = build_engine_yolov9_m(max_batchsize, builder, config, DataType::kFLOAT, wts_name, false);
     } else if (sub_type == "c") {
         serialized_engine = build_engine_yolov9_c(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
-    } else if (sub_type == "ge") {
-        serialized_engine = build_engine_gelan_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
+    } else if (sub_type == "e") {
+        serialized_engine = build_engine_yolov9_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
+    }
+
+    else if (sub_type == "gt") {
+        serialized_engine = build_engine_yolov9_t(max_batchsize, builder, config, DataType::kFLOAT, wts_name, true);
+    } else if (sub_type == "gs") {
+        serialized_engine = build_engine_yolov9_s(max_batchsize, builder, config, DataType::kFLOAT, wts_name, true);
+    } else if (sub_type == "gm") {
+        serialized_engine = build_engine_yolov9_m(max_batchsize, builder, config, DataType::kFLOAT, wts_name, true);
     } else if (sub_type == "gc") {
         serialized_engine = build_engine_gelan_c(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
+    } else if (sub_type == "ge") {
+        serialized_engine = build_engine_gelan_e(max_batchsize, builder, config, DataType::kFLOAT, wts_name);
     } else {
         return;
     }
+
     assert(serialized_engine != nullptr);
 
     std::ofstream p(engine_name, std::ios::binary);
@@ -114,19 +129,19 @@ int main(int argc, char** argv) {
     cudaSetDevice(kGpuId);
 
     std::string wts_name = "";
-    std::string engine_name = "";
-    std::string img_dir = "";
-    std::string sub_type = "";
+    std::string engine_name = "../yolov9-m-converted.engine";
+    std::string img_dir = "../images";
+    std::string sub_type = "m";
     // speed test or inference
-    // const int speed_test_iter = 1000;
-    const int speed_test_iter = 1;
-
-    if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type)) {
-        std::cerr << "Arguments not right!" << std::endl;
-        std::cerr << "./yolov9 -s [.wts] [.engine] [c/e/gc/ge]  // serialize model to plan file" << std::endl;
-        std::cerr << "./yolov9 -d [.engine] ../samples  // deserialize plan file and run inference" << std::endl;
-        return -1;
-    }
+    const int speed_test_iter = 1000;
+    // const int speed_test_iter = 1;
+
+    // if (!parse_args(argc, argv, wts_name, engine_name, img_dir, sub_type)) {
+    //     std::cerr << "Arguments not right!" << std::endl;
+    //     std::cerr << "./yolov9 -s [.wts] [.engine] [s/m/c/e/gt/gs/gm/gc/ge]  // serialize model to plan file" << std::endl;
+    //     std::cerr << "./yolov9 -d [.engine] ../samples  // deserialize plan file and run inference" << std::endl;
+    //     return -1;
+    // }
 
     // Create a model using the API directly and serialize it to a file
     if (!wts_name.empty()) {

diff --git a/yolov9/include/block.h b/yolov9/include/block.h
@@ -22,10 +22,14 @@ std::vector<std::vector<float>> getAnchors(std::map<std::string, Weights>& weigh
 // ----------------------------------------------------------------
 nvinfer1::ILayer* convBnSiLU(nvinfer1::INetworkDefinition* network, std::map<std::string, nvinfer1::Weights>& weightMap,
                              nvinfer1::ITensor& input, int ch, int k, int s, int p, std::string lname, int g = 1);
+ILayer* ELAN1(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2,
+              int c3, int c4, std::string lname);
 ILayer* RepNCSPELAN4(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1,
                      int c2, int c3, int c4, int c5, std::string lname);
 ILayer* ADown(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
               std::string lname);
+ILayer* AConv(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
+              std::string lname);
 std::vector<ILayer*> CBLinear(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input,
                               std::vector<int> c2s, int k, int s, int p, int g, std::string lname);
 ILayer* CBFuse(INetworkDefinition* network, std::vector<std::vector<ILayer*>> input, std::vector<int> idx,

diff --git a/yolov9/include/model.h b/yolov9/include/model.h
@@ -2,16 +2,32 @@
 
 #include <NvInfer.h>
 #include <string>
-nvinfer1::IHostMemory* build_engine_yolov9_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+// yolov9
+nvinfer1::IHostMemory* build_engine_yolov9_t(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
                                              nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
-                                             std::string& wts_name);
+                                             std::string& wts_name, bool isConvert = false);
+nvinfer1::IHostMemory* build_engine_yolov9_s(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
+                                             std::string& wts_name, bool isConvert = false);
+nvinfer1::IHostMemory* build_engine_yolov9_m(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
+                                             std::string& wts_name, bool isConvert = false);
 nvinfer1::IHostMemory* build_engine_yolov9_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
                                              nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
                                              std::string& wts_name);
-
-nvinfer1::IHostMemory* build_engine_gelan_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+nvinfer1::IHostMemory* build_engine_yolov9_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
+                                             std::string& wts_name);
+// gelan
+nvinfer1::IHostMemory* build_engine_gelan_t(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+                                            nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
+                                            std::string& wts_name);
+nvinfer1::IHostMemory* build_engine_gelan_m(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
                                             std::string& wts_name);
 nvinfer1::IHostMemory* build_engine_gelan_c(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
                                             nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
                                             std::string& wts_name);
+nvinfer1::IHostMemory* build_engine_gelan_e(unsigned int maxBatchSize, nvinfer1::IBuilder* builder,
+                                            nvinfer1::IBuilderConfig* config, nvinfer1::DataType dt,
+                                            std::string& wts_name);
diff --git a/yolov9/src/block.cpp b/yolov9/src/block.cpp
@@ -204,6 +204,29 @@ ILayer* RepNCSP(INetworkDefinition* network, std::map<std::string, Weights>& wei
     auto cv3 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv3", 1);
     return cv3;
 }
+
+ILayer* ELAN1(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1, int c2,
+              int c3, int c4, std::string lname) {
+    auto cv1 = convBnSiLU(network, weightMap, input, c3, 1, 1, 0, lname + ".cv1", 1);
+    // 将cv1的输出分成两部分 chunk(2, 1)
+
+    nvinfer1::Dims d = cv1->getOutput(0)->getDimensions();
+    nvinfer1::ISliceLayer* split1 =
+            network->addSlice(*cv1->getOutput(0), nvinfer1::Dims3{0, 0, 0}, nvinfer1::Dims3{d.d[0] / 2, d.d[1], d.d[2]},
+                              nvinfer1::Dims3{1, 1, 1});
+    nvinfer1::ISliceLayer* split2 =
+            network->addSlice(*cv1->getOutput(0), nvinfer1::Dims3{d.d[0] / 2, 0, 0},
+                              nvinfer1::Dims3{d.d[0] / 2, d.d[1], d.d[2]}, nvinfer1::Dims3{1, 1, 1});
+    auto cv2 = convBnSiLU(network, weightMap, *split2->getOutput(0), c4, 3, 1, 1, lname + ".cv2", 1);
+
+    auto cv3 = convBnSiLU(network, weightMap, *cv2->getOutput(0), c4, 3, 1, 1, lname + ".cv3", 1);
+
+    ITensor* inputTensors[] = {split1->getOutput(0), split2->getOutput(0), cv2->getOutput(0), cv3->getOutput(0)};
+    auto cat = network->addConcatenation(inputTensors, 4);
+    auto cv4 = convBnSiLU(network, weightMap, *cat->getOutput(0), c2, 1, 1, 0, lname + ".cv4", 1);
+    return cv4;
+}
+
 ILayer* RepNCSPELAN4(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c1,
                      int c2, int c3, int c4, int c5, std::string lname) {
 
@@ -230,6 +253,14 @@ ILayer* RepNCSPELAN4(INetworkDefinition* network, std::map<std::string, Weights>
     return cv4;
 }
 
+ILayer* AConv(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
+              std::string lname) {
+    auto pool = network->addPoolingNd(input, PoolingType::kAVERAGE, DimsHW{2, 2});
+    pool->setStrideNd(DimsHW{1, 1});
+    pool->setPaddingNd(DimsHW{0, 0});
+    auto cv1 = convBnSiLU(network, weightMap, *pool->getOutput(0), c2, 3, 2, 1, lname + ".cv1", 1);
+    return cv1;
+}
 ILayer* ADown(INetworkDefinition* network, std::map<std::string, Weights>& weightMap, ITensor& input, int c2,
               std::string lname) {
     int c_ = c2 / 2;
@@ -426,7 +457,9 @@ std::vector<IConcatenationLayer*> DualDDetect(INetworkDefinition* network, std::
 std::vector<IConcatenationLayer*> DDetect(INetworkDefinition* network, std::map<std::string, Weights>& weightMap,
                                           std::vector<ILayer*> dets, int cls, std::vector<int> ch, std::string lname) {
     int c2 = std::max(int(ch[0] / 4), int(16 * 4));
-    int c3 = std::max(ch[0], std::min(cls * 2, 128));
+    //  max((ch[0], min((self.nc * 2, 128))))
+    // int c3 = std::max(ch[0], std::min(cls * 2, 128));
+    int c3 = std::max(ch[0], std::min(cls, 128));
     int reg_max = 16;
 
     std::vector<ILayer*> bboxlayers;