ultralytics · glenn-jocher · Sep 25, 2020 · Sep 27, 2020 · Sep 25, 2020 · Sep 27, 2020
diff --git a/Dockerfile b/Dockerfile
@@ -1,5 +1,5 @@
 # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
-FROM nvcr.io/nvidia/pytorch:20.08-py3
+FROM nvcr.io/nvidia/pytorch:20.09-py3
 
 # Install dependencies
 RUN pip install --upgrade pip

diff --git a/data/hyp.finetune.yaml b/data/hyp.finetune.yaml
@@ -15,7 +15,7 @@ weight_decay: 0.00036
 warmup_epochs: 2.0
 warmup_momentum: 0.5
 warmup_bias_lr: 0.05
-giou: 0.0296
+box: 0.0296
 cls: 0.243
 cls_pw: 0.631
 obj: 0.301

diff --git a/data/hyp.scratch.yaml b/data/hyp.scratch.yaml
@@ -10,7 +10,7 @@ weight_decay: 0.0005  # optimizer weight decay 5e-4
 warmup_epochs: 3.0  # warmup epochs (fractions ok)
 warmup_momentum: 0.8  # warmup initial momentum
 warmup_bias_lr: 0.1  # warmup initial bias lr
-giou: 0.05  # box loss gain
+box: 0.05  # box loss gain
 cls: 0.5  # cls loss gain
 cls_pw: 1.0  # cls BCELoss positive_weight
 obj: 1.0  # obj loss gain (scale with pixels)

diff --git a/data/scripts/get_coco.sh b/data/scripts/get_coco.sh
@@ -8,14 +8,17 @@
 #     /yolov5
 
 # Download/unzip labels
-echo 'Downloading COCO 2017 labels ...'
 d='../' # unzip directory
-f='coco2017labels.zip' && curl -L https://github.com/ultralytics/yolov5/releases/download/v1.0/$f -o $f
-unzip -q $f -d $d && rm $f
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+f='coco2017labels.zip'                                                                 # 68 MB
+echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
 
 # Download/unzip images
-echo 'Downloading COCO 2017 images ...'
 d='../coco/images' # unzip directory
-f='train2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f # 19G, 118k images
-f='val2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f   # 1G, 5k images
-# f='test2017.zip' && curl http://images.cocodataset.org/zips/$f -o $f && unzip -q $f -d $d && rm $f  # 7G,  41k images
+url=http://images.cocodataset.org/zips/
+f1='train2017.zip' # 19G, 118k images
+f2='val2017.zip'   # 1G, 5k images
+f3='test2017.zip'  # 7G, 41k images (optional)
+for f in $f1 $f2; do
+  echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
+done
diff --git a/data/scripts/get_voc.sh b/data/scripts/get_voc.sh
@@ -8,79 +8,23 @@
 #     /yolov5
 
 start=$(date +%s)
-
-# handle optional download dir
-if [ -z "$1" ]; then
-  # navigate to ~/tmp
-  echo "navigating to ../tmp/ ..."
-  mkdir -p ../tmp
-  cd ../tmp/
-else
-  # check if is valid directory
-  if [ ! -d $1 ]; then
-    echo $1 "is not a valid directory"
-    exit 0
-  fi
-  echo "navigating to" $1 "..."
-  cd $1
-fi
-
-echo "Downloading VOC2007 trainval ..."
-# Download data
-curl -LO http://pjreddie.com/media/files/VOCtrainval_06-Nov-2007.tar
-echo "Downloading VOC2007 test data ..."
-curl -LO http://pjreddie.com/media/files/VOCtest_06-Nov-2007.tar
-echo "Done downloading."
-
-# Extract data
-echo "Extracting trainval ..."
-tar -xf VOCtrainval_06-Nov-2007.tar
-echo "Extracting test ..."
-tar -xf VOCtest_06-Nov-2007.tar
-echo "removing tars ..."
-rm VOCtrainval_06-Nov-2007.tar
-rm VOCtest_06-Nov-2007.tar
-
-end=$(date +%s)
-runtime=$((end - start))
-
-echo "Completed in" $runtime "seconds"
-
-start=$(date +%s)
-
-# handle optional download dir
-if [ -z "$1" ]; then
-  # navigate to ~/tmp
-  echo "navigating to ../tmp/ ..."
-  mkdir -p ../tmp
-  cd ../tmp/
-else
-  # check if is valid directory
-  if [ ! -d $1 ]; then
-    echo $1 "is not a valid directory"
-    exit 0
-  fi
-  echo "navigating to" $1 "..."
-  cd $1
-fi
-
-echo "Downloading VOC2012 trainval ..."
-# Download data
-curl -LO http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
-echo "Done downloading."
-
-# Extract data
-echo "Extracting trainval ..."
-tar -xf VOCtrainval_11-May-2012.tar
-echo "removing tar ..."
-rm VOCtrainval_11-May-2012.tar
+mkdir -p ../tmp
+cd ../tmp/
+
+# Download/unzip images and labels
+d='.' # unzip directory
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
+f2=VOCtest_06-Nov-2007.zip     # 438MB, 4953 images
+f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
+for f in $f1 $f2 $f3; do
+  echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
+done
 
 end=$(date +%s)
 runtime=$((end - start))
-
 echo "Completed in" $runtime "seconds"
 
-cd ../tmp
 echo "Spliting dataset..."
 python3 - "$@" <<END
 import xml.etree.ElementTree as ET

diff --git a/detect.py b/detect.py
@@ -1,6 +1,5 @@
 import argparse
 import os
-import platform
 import shutil
 import time
 from pathlib import Path
@@ -150,8 +149,8 @@ def detect(save_img=False):
     parser.add_argument('--source', type=str, default='inference/images', help='source')  # file/folder, 0 for webcam
     parser.add_argument('--output', type=str, default='inference/output', help='output folder')  # output folder
     parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
-    parser.add_argument('--conf-thres', type=float, default=0.4, help='object confidence threshold')
-    parser.add_argument('--iou-thres', type=float, default=0.5, help='IOU threshold for NMS')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--view-img', action='store_true', help='display results')
     parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')

diff --git a/hubconf.py b/hubconf.py
@@ -10,7 +10,6 @@
 
 import torch
 
-from models.common import NMS
 from models.yolo import Model
 from utils.google_utils import attempt_download
 
@@ -36,9 +35,7 @@ def create(name, pretrained, channels, classes):
             state_dict = torch.load(ckpt, map_location=torch.device('cpu'))['model'].float().state_dict()  # to FP32
             state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape}  # filter
             model.load_state_dict(state_dict, strict=False)  # load
-
-            model.add_nms()  # add NMS module
-            model.eval()
+            # model = model.autoshape()  # cv2/PIL/np/torch inference:  predictions = model(Image.open('image.jpg'))
         return model
 
     except Exception as e:

diff --git a/models/common.py b/models/common.py
@@ -1,9 +1,12 @@
 # This file contains modules common to various models
-import math
 
+import math
+import numpy as np
 import torch
 import torch.nn as nn
-from utils.general import non_max_suppression
+
+from utils.datasets import letterbox
+from utils.general import non_max_suppression, make_divisible, scale_coords
 
 
 def autopad(k, p=None):  # kernel, padding
@@ -101,17 +104,68 @@ def forward(self, x):
 
 class NMS(nn.Module):
     # Non-Maximum Suppression (NMS) module
-    conf = 0.3  # confidence threshold
-    iou = 0.6  # IoU threshold
+    conf = 0.25  # confidence threshold
+    iou = 0.45  # IoU threshold
     classes = None  # (optional list) filter by class
 
-    def __init__(self, dimension=1):
+    def __init__(self):
         super(NMS, self).__init__()
 
     def forward(self, x):
         return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
 
 
+class autoShape(nn.Module):
+    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    img_size = 640  # inference size (pixels)
+    conf = 0.25  # NMS confidence threshold
+    iou = 0.45  # NMS IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self, model):
+        super(autoShape, self).__init__()
+        self.model = model
+
+    def forward(self, x, size=640, augment=False, profile=False):
+        # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
+        #   opencv:     x = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
+        #   PIL:        x = Image.open('image.jpg')  # HWC x(720,1280,3)
+        #   numpy:      x = np.zeros((720,1280,3))  # HWC
+        #   torch:      x = torch.zeros(16,3,720,1280)  # BCHW
+        #   multiple:   x = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+
+        p = next(self.model.parameters())  # for device and type
+        if isinstance(x, torch.Tensor):  # torch
+            return self.model(x.to(p.device).type_as(p), augment, profile)  # inference
+
+        # Pre-process
+        if not isinstance(x, list):
+            x = [x]
+        shape0, shape1 = [], []  # image and inference shapes
+        batch = range(len(x))  # batch size
+        for i in batch:
+            x[i] = np.array(x[i])[:, :, :3]  # up to 3 channels if png
+            s = x[i].shape[:2]  # HWC
+            shape0.append(s)  # image shape
+            g = (size / max(s))  # gain
+            shape1.append([y * g for y in s])
+        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
+        x = [letterbox(x[i], new_shape=shape1, auto=False)[0] for i in batch]  # pad
+        x = np.stack(x, 0) if batch[-1] else x[0][None]  # stack
+        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
+
+        # Inference
+        x = self.model(x, augment, profile)  # forward
+        x = non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
+
+        # Post-process
+        for i in batch:
+            if x[i] is not None:
+                x[i][:, :4] = scale_coords(shape1, x[i][:, :4], shape0[i])
+        return x
+
+
 class Flatten(nn.Module):
     # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
     @staticmethod

diff --git a/models/export.py b/models/export.py
@@ -29,9 +29,6 @@
     set_logging()
     t = time.time()
 
-    # Input
-    img = torch.zeros((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection
-
     # Load PyTorch model
     model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model
     labels = model.names
@@ -40,6 +37,9 @@
     gs = int(max(model.stride))  # grid size (max stride)
     opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
 
+    # Input
+    img = torch.zeros(opt.batch_size, 3, *opt.img_size)  # image size(1,3,320,192) iDetection
+
     # Update model
     for k, m in model.named_modules():
         m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility

diff --git a/models/yolo.py b/models/yolo.py
@@ -1,21 +1,22 @@
 import argparse
 import logging
-import math
 import sys
 from copy import deepcopy
 from pathlib import Path
 
+import math
+
 sys.path.append('./')  # to run '$ python *.py' files in subdirectories
 logger = logging.getLogger(__name__)
 
 import torch
 import torch.nn as nn
 
-from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, NMS
+from models.common import Conv, Bottleneck, SPP, DWConv, Focus, BottleneckCSP, Concat, NMS, autoShape
 from models.experimental import MixConv2d, CrossConv, C3
 from utils.general import check_anchor_order, make_divisible, check_file, set_logging
-from utils.torch_utils import (
-    time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, select_device)
+from utils.torch_utils import time_synchronized, fuse_conv_and_bn, model_info, scale_img, initialize_weights, \
+    select_device, copy_attr
 
 
 class Detect(nn.Module):
@@ -140,6 +141,7 @@ def forward_once(self, x, profile=False):
         return x
 
     def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # https://arxiv.org/abs/1708.02002 section 3.3
         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
         m = self.model[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
@@ -162,23 +164,34 @@ def _print_biases(self):
     def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
         print('Fusing layers... ')
         for m in self.model.modules():
-            if type(m) is Conv and hasattr(Conv, 'bn'):
+            if type(m) is Conv and hasattr(m, 'bn'):
                 m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatability
                 m.conv = fuse_conv_and_bn(m.conv, m.bn)  # update conv
                 delattr(m, 'bn')  # remove batchnorm
                 m.forward = m.fuseforward  # update forward
         self.info()
         return self
 
-    def add_nms(self):  # fuse model Conv2d() + BatchNorm2d() layers
-        if type(self.model[-1]) is not NMS:  # if missing NMS
-            print('Adding NMS module... ')
+    def nms(self, mode=True):  # add or remove NMS module
+        present = type(self.model[-1]) is NMS  # last layer is NMS
+        if mode and not present:
+            print('Adding NMS... ')
             m = NMS()  # module
             m.f = -1  # from
             m.i = self.model[-1].i + 1  # index
             self.model.add_module(name='%s' % m.i, module=m)  # add
+            self.eval()
+        elif not mode and present:
+            print('Removing NMS... ')
+            self.model = self.model[:-1]  # remove
         return self
 
+    def autoshape(self):  # add autoShape module
+        print('Adding autoShape... ')
+        m = autoShape(self)  # wrap model
+        copy_attr(m, self, include=('yaml', 'nc', 'hyp', 'names', 'stride'), exclude=())  # copy attributes
+        return m
+
     def info(self, verbose=False):  # print model information
         model_info(self, verbose)
 
@@ -263,10 +276,6 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
     # img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
     # y = model(img, profile=True)
 
-    # ONNX export
-    # model.model[-1].export = True
-    # torch.onnx.export(model, img, opt.cfg.replace('.yaml', '.onnx'), verbose=True, opset_version=11)
-
     # Tensorboard
     # from torch.utils.tensorboard import SummaryWriter
     # tb_writer = SummaryWriter()

diff --git a/requirements.txt b/requirements.txt
@@ -18,7 +18,7 @@ tqdm>=4.41.0
 
 # export --------------------------------------
 # packaging  # for coremltools
-# coremltools==4.0b4
+# coremltools==4.0
 # onnx>=1.7.0
 # scikit-learn==0.19.2  # for coreml quantization