From 364fcfd7dba53f46edd4f04c037a039c0a287972 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 22 Jun 2020 14:27:17 -0700 Subject: [PATCH] PANet update --- README.md | 15 +++++++------ models/yolov3-spp.yaml | 3 +-- models/yolov5l.yaml | 49 ++++++++++++++++++++++++------------------ models/yolov5m.yaml | 49 ++++++++++++++++++++++++------------------ models/yolov5s.yaml | 49 ++++++++++++++++++++++++------------------ models/yolov5x.yaml | 49 ++++++++++++++++++++++++------------------ utils/utils.py | 8 ++++--- 7 files changed, 126 insertions(+), 96 deletions(-) diff --git a/README.md b/README.md index 591a8588a5fe..df49f6755ba2 100755 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This repository represents Ultralytics open-source research into future object detection methods, and incorporates our lessons learned and best practices evolved over training thousands of models on custom client datasets with our previous YOLO repository https://github.com/ultralytics/yolov3. **All code and models are under active development, and are subject to modification or deletion without notice.** Use at your own risk. -** GPU Speed measures end-to-end time per image averaged over 5000 COCO val2017 images using a V100 GPU with batch size 32, and includes image preprocessing, PyTorch FP32 inference, postprocessing and NMS. +** GPU Speed measures end-to-end time per image averaged over 5000 COCO val2017 images using a V100 GPU with batch size 8, and includes image preprocessing, PyTorch FP16 inference, postprocessing and NMS. - **June 19, 2020**: [FP16](https://pytorch.org/docs/stable/nn.html#torch.nn.Module.half) as new default for smaller checkpoints and faster inference. Comparison in [d4c6674](https://github.com/ultralytics/yolov5/commit/d4c6674c98e19df4c40e33a777610a18d1961145). - **June 9, 2020**: [CSP](https://github.com/WongKinYiu/CrossStagePartialNetworks) updates to all YOLOv5 models. New models are faster, smaller and more accurate. Credit to @WongKinYiu for his excellent work with CSP. @@ -14,13 +14,14 @@ This repository represents Ultralytics open-source research into future object d ## Pretrained Checkpoints -| Model | APval | APtest | AP50 | SpeedGPU | FPSGPU || params | FLOPs | +| Model | APval | APtest | AP50 | SpeedGPU | FPSGPU || params | FLOPS | |---------- |------ |------ |------ | -------- | ------| ------ |------ | :------: | -| YOLOv5-s ([ckpt](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J)) | 35.5 | 35.5 | 55.0 | **2.1ms** | **476** || 7.1M | 12.6B -| YOLOv5-m ([ckpt](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J)) | 42.7 | 42.7 | 62.4 | 3.2ms | 312 || 22.0M | 39.0B -| YOLOv5-l ([ckpt](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J)) | 45.7 | 45.9 | 65.1 | 4.1ms | 243 || 50.3M | 89.0B -| YOLOv5-x ([ckpt](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J)) | **47.2** | **47.3** | **66.6** | 6.5ms | 153 || 95.9M | 170.3B -| YOLOv3-SPP ([ckpt](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J)) | 45.6 | 45.5 | 65.2 | 4.8ms | 208 || 63.0M | 118.0B +| [YOLOv5s](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J) | 36.5 | 36.5 | 55.6 | **2.2ms** | **455** || 7.5M | 13.2B +| [YOLOv5m](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J) | 43.4 | 43.4 | 62.4 | 3.0ms | 333 || 21.8M | 39.4B +| [YOLOv5l](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J) | 46.6 | 46.7 | 65.4 | 3.9ms | 256 || 47.8M | 88.1B +| [YOLOv5x](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J) | **48.2** | **48.3** | **66.9** | 6.1ms | 164 || 89.0M | 166.4B +| [YOLOv3-SPP](https://drive.google.com/open?id=1Drs_Aiu7xx6S-ix95f9kNsA6ueKRpN2J) | 45.6 | 45.5 | 65.2 | 4.5ms | 222 || 63.0M | 118.0B + ** APtest denotes COCO [test-dev2017](http://cocodataset.org/#upload) server results, all other AP results in the table denote val2017 accuracy. ** All AP numbers are for single-model single-scale without ensemble or test-time augmentation. Reproduce by `python test.py --img 736 --conf 0.001` diff --git a/models/yolov3-spp.yaml b/models/yolov3-spp.yaml index 3dad00966868..6508dc43586f 100644 --- a/models/yolov3-spp.yaml +++ b/models/yolov3-spp.yaml @@ -25,8 +25,7 @@ backbone: [-1, 4, Bottleneck, [1024]], # 10 ] -# yolov3-spp head -# na = len(anchors[0]) +# YOLOv3-SPP head head: [[-1, 1, Bottleneck, [1024, False]], # 11 [-1, 1, SPP, [512, [5, 9, 13]]], diff --git a/models/yolov5l.yaml b/models/yolov5l.yaml index f270fdc0258a..959d4bddcda3 100644 --- a/models/yolov5l.yaml +++ b/models/yolov5l.yaml @@ -5,41 +5,48 @@ width_multiple: 1.0 # layer channel multiple # anchors anchors: - - [10,13, 16,30, 33,23] # P3/8 - - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 + - [30,61, 62,45, 59,119] # P4/16 + - [10,13, 16,30, 33,23] # P3/8 -# yolov5 backbone +# YOLOv5 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 1-P1/2 - [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 - [-1, 3, Bottleneck, [128]], - [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 9, BottleneckCSP, [256]], - [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, BottleneckCSP, [512]], - [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 6, BottleneckCSP, [1024]], # 10 ] -# yolov5 head +# YOLOv5 head head: - [[-1, 3, BottleneckCSP, [1024, False]], # 11 - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 12 (P5/32-large) + [[-1, 3, BottleneckCSP, [1024, False]], # 9 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 1, Conv, [512, 1, 1]], - [-1, 3, BottleneckCSP, [512, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 17 (P4/16-medium) + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, BottleneckCSP, [512, False]], # 13 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 3, BottleneckCSP, [256, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P3/8-small) + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 18 (P3/8-small) + + [-2, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, BottleneckCSP, [512, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, BottleneckCSP, [1024, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 26 (P5/32-large) - [[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + [[], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) ] diff --git a/models/yolov5m.yaml b/models/yolov5m.yaml index 849800458cfa..60037c261d22 100644 --- a/models/yolov5m.yaml +++ b/models/yolov5m.yaml @@ -5,41 +5,48 @@ width_multiple: 0.75 # layer channel multiple # anchors anchors: - - [10,13, 16,30, 33,23] # P3/8 - - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 + - [30,61, 62,45, 59,119] # P4/16 + - [10,13, 16,30, 33,23] # P3/8 -# yolov5 backbone +# YOLOv5 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 1-P1/2 - [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 - [-1, 3, Bottleneck, [128]], - [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 9, BottleneckCSP, [256]], - [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, BottleneckCSP, [512]], - [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 6, BottleneckCSP, [1024]], # 10 ] -# yolov5 head +# YOLOv5 head head: - [[-1, 3, BottleneckCSP, [1024, False]], # 11 - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 12 (P5/32-large) + [[-1, 3, BottleneckCSP, [1024, False]], # 9 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 1, Conv, [512, 1, 1]], - [-1, 3, BottleneckCSP, [512, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 17 (P4/16-medium) + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, BottleneckCSP, [512, False]], # 13 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 3, BottleneckCSP, [256, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P3/8-small) + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 18 (P3/8-small) + + [-2, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, BottleneckCSP, [512, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, BottleneckCSP, [1024, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 26 (P5/32-large) - [[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + [[], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) ] diff --git a/models/yolov5s.yaml b/models/yolov5s.yaml index 482d1ddea48d..1eaef976dc16 100644 --- a/models/yolov5s.yaml +++ b/models/yolov5s.yaml @@ -5,41 +5,48 @@ width_multiple: 0.50 # layer channel multiple # anchors anchors: - - [10,13, 16,30, 33,23] # P3/8 - - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 + - [30,61, 62,45, 59,119] # P4/16 + - [10,13, 16,30, 33,23] # P3/8 -# yolov5 backbone +# YOLOv5 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 1-P1/2 - [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 - [-1, 3, Bottleneck, [128]], - [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 9, BottleneckCSP, [256]], - [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, BottleneckCSP, [512]], - [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 6, BottleneckCSP, [1024]], # 10 ] -# yolov5 head +# YOLOv5 head head: - [[-1, 3, BottleneckCSP, [1024, False]], # 11 - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 12 (P5/32-large) + [[-1, 3, BottleneckCSP, [1024, False]], # 9 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 1, Conv, [512, 1, 1]], - [-1, 3, BottleneckCSP, [512, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 17 (P4/16-medium) + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, BottleneckCSP, [512, False]], # 13 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 3, BottleneckCSP, [256, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P3/8-small) + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 18 (P3/8-small) + + [-2, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, BottleneckCSP, [512, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, BottleneckCSP, [1024, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 26 (P5/32-large) - [[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + [[], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) ] diff --git a/models/yolov5x.yaml b/models/yolov5x.yaml index 47658e885153..dcd6fbc8676d 100644 --- a/models/yolov5x.yaml +++ b/models/yolov5x.yaml @@ -5,41 +5,48 @@ width_multiple: 1.25 # layer channel multiple # anchors anchors: - - [10,13, 16,30, 33,23] # P3/8 - - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 + - [30,61, 62,45, 59,119] # P4/16 + - [10,13, 16,30, 33,23] # P3/8 -# yolov5 backbone +# YOLOv5 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 1-P1/2 - [-1, 1, Conv, [128, 3, 2]], # 2-P2/4 - [-1, 3, Bottleneck, [128]], - [-1, 1, Conv, [256, 3, 2]], # 4-P3/8 + [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 + [-1, 3, BottleneckCSP, [128]], + [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 [-1, 9, BottleneckCSP, [256]], - [-1, 1, Conv, [512, 3, 2]], # 6-P4/16 + [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, BottleneckCSP, [512]], - [-1, 1, Conv, [1024, 3, 2]], # 8-P5/32 + [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 6, BottleneckCSP, [1024]], # 10 ] -# yolov5 head +# YOLOv5 head head: - [[-1, 3, BottleneckCSP, [1024, False]], # 11 - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 12 (P5/32-large) + [[-1, 3, BottleneckCSP, [1024, False]], # 9 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 1, Conv, [512, 1, 1]], - [-1, 3, BottleneckCSP, [512, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 17 (P4/16-medium) + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 6], 1, Concat, [1]], # cat backbone P4 + [-1, 3, BottleneckCSP, [512, False]], # 13 - [-2, 1, nn.Upsample, [None, 2, 'nearest']], - [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 1, Conv, [256, 1, 1]], + [-1, 1, nn.Upsample, [None, 2, 'nearest']], + [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 3, BottleneckCSP, [256, False]], - [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P3/8-small) + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 18 (P3/8-small) + + [-2, 1, Conv, [256, 3, 2]], + [[-1, 14], 1, Concat, [1]], # cat head P4 + [-1, 3, BottleneckCSP, [512, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 22 (P4/16-medium) + + [-2, 1, Conv, [512, 3, 2]], + [[-1, 10], 1, Concat, [1]], # cat head P5 + [-1, 3, BottleneckCSP, [1024, False]], + [-1, 1, nn.Conv2d, [na * (nc + 5), 1, 1]], # 26 (P5/32-large) - [[], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) + [[], 1, Detect, [nc, anchors]], # Detect(P5, P4, P3) ] diff --git a/utils/utils.py b/utils/utils.py index f1f5db504893..9dd1d65a0a3d 100755 --- a/utils/utils.py +++ b/utils/utils.py @@ -1094,12 +1094,14 @@ def plot_study_txt(f='study.txt', x=None): # from utils.utils import *; plot_st ax2.plot(1E3 / np.array([209, 140, 97, 58, 35, 18]), [33.5, 39.1, 42.5, 45.9, 49., 50.5], 'k.-', linewidth=2, markersize=8, alpha=.25, label='EfficientDet') + + ax2.grid() ax2.set_xlim(0, 30) - ax2.set_ylim(25, 50) - ax2.set_xlabel('GPU Latency (ms)') + ax2.set_ylim(28, 50) + ax2.set_yticks(np.arange(30, 55, 5)) + ax2.set_xlabel('GPU Speed (ms/img)') ax2.set_ylabel('COCO AP val') ax2.legend(loc='lower right') - ax2.grid() plt.savefig('study_mAP_latency.png', dpi=300) plt.savefig(f.replace('.txt', '.png'), dpi=200)