From cdb9bde181641917504717e162952826fca61a41 Mon Sep 17 00:00:00 2001
From: yxNONG <62932917+yxNONG@users.noreply.github.com>
Date: Tue, 30 Jun 2020 19:06:28 +0800
Subject: [PATCH 01/16] Unify the check point of single and multi GPU

save the model.hyp etc to checkpoint when use multi GPU training
---
 train.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/train.py b/train.py
index 39dd0e555572..d933a5de074d 100644
--- a/train.py
+++ b/train.py
@@ -79,7 +79,7 @@ def train(hyp):
     # Create model
     model = Model(opt.cfg).to(device)
     assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
-    model.names = data_dict['names']
+   
 
     # Image sizes
     gs = int(max(model.stride))  # grid size (max stride)
@@ -172,6 +172,7 @@ def train(hyp):
     model.hyp = hyp  # attach hyperparameters to model
     model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
+    model.names = data_dict['names']
 
     # Class frequency
     labels = np.concatenate(dataset.labels, 0)
@@ -314,6 +315,14 @@ def train(hyp):
         # Save model
         save = (not opt.nosave) or (final_epoch and not opt.evolve)
         if save:
+            if hasattr(model, 'module'):
+                # Duplicate Model parameters for Multi-GPU save
+                ema.ema.module.nc = model.nc  # attach number of classes to model
+                ema.ema.module.hyp = model.hyp  # attach hyperparameters to model
+                ema.ema.module.gr = model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
+                ema.ema.module.class_weights = model.class_weights # attach class weights
+                ema.ema.module.names = data_dict['names']
+                
             with open(results_file, 'r') as f:  # create checkpoint
                 ckpt = {'epoch': epoch,
                         'best_fitness': best_fitness,

From 86784cfdbf123df3945ff5664a3ffe46a2304aa8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 30 Jun 2020 21:43:53 -0700
Subject: [PATCH 02/16] --resume bug fix #252

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index 27a877157302..fa672d1cd194 100644
--- a/train.py
+++ b/train.py
@@ -378,7 +378,7 @@ def train(hyp):
     parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
     parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
     opt = parser.parse_args()
-    opt.weights = last if opt.resume else opt.weights
+    opt.weights = last if opt.resume and not opt.weights else opt.weights
     opt.cfg = check_file(opt.cfg)  # check file
     opt.data = check_file(opt.data)  # check file
     print(opt)

From 1c86c2cb6600f4d019fcd3ba67bebceeaf982f5a Mon Sep 17 00:00:00 2001
From: edurenye <edurenye@hotmail.com>
Date: Wed, 1 Jul 2020 10:04:26 +0200
Subject: [PATCH 03/16] Add torchscript files to gitignore

---
 .gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.gitignore b/.gitignore
index 5a95798f0f61..07993ab27f15 100755
--- a/.gitignore
+++ b/.gitignore
@@ -50,6 +50,7 @@ gcp_test*.sh
 *.pt
 *.onnx
 *.mlmodel
+*.torchscript
 darknet53.conv.74
 yolov3-tiny.conv.15
 

From b5659d1195907472d7db5f23bab15d7d3b101891 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 1 Jul 2020 11:44:49 -0700
Subject: [PATCH 04/16] module updates

---
 models/common.py       | 17 ++++++++++-------
 models/experimental.py | 34 ++++++++++++++++++++++++++++++++++
 2 files changed, 44 insertions(+), 7 deletions(-)

diff --git a/models/common.py b/models/common.py
index 3c4a0d729210..6a5972311f77 100644
--- a/models/common.py
+++ b/models/common.py
@@ -1,9 +1,13 @@
 # This file contains modules common to various models
 
-
 from utils.utils import *
 
 
+def autopad(k):
+    # Pad to 'same'
+    return k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+
+
 def DWConv(c1, c2, k=1, s=1, act=True):
     # Depthwise convolution
     return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
@@ -11,10 +15,9 @@ def DWConv(c1, c2, k=1, s=1, act=True):
 
 class Conv(nn.Module):
     # Standard convolution
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
         super(Conv, self).__init__()
-        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # padding
-        self.conv = nn.Conv2d(c1, c2, k, s, p, groups=g, bias=False)
+        self.conv = nn.Conv2d(c1, c2, k, s, p or autopad(k), groups=g, bias=False)
         self.bn = nn.BatchNorm2d(c2)
         self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
 
@@ -46,7 +49,7 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, nu
         self.cv1 = Conv(c1, c_, 1, 1)
         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
-        self.cv4 = Conv(c2, c2, 1, 1)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
         self.act = nn.LeakyReLU(0.1, inplace=True)
         self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
@@ -79,9 +82,9 @@ def forward(self, x):
 
 class Focus(nn.Module):
     # Focus wh information into c-space
-    def __init__(self, c1, c2, k=1):
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
         super(Focus, self).__init__()
-        self.conv = Conv(c1 * 4, c2, k, 1)
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
 
     def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
         return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
diff --git a/models/experimental.py b/models/experimental.py
index 60cb7aa14cd5..cff9d141446d 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -1,6 +1,40 @@
+# This file contains experimental modules
+
 from models.common import *
 
 
+class CrossConv(nn.Module):
+    # Cross Convolution
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, 3), 1)
+        self.cv2 = Conv(c_, c2, (3, 1), 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+    # Cross Convolution CSP
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[CrossConv(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
 class Sum(nn.Module):
     # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
     def __init__(self, n, weight=False):  # n: number of inputs

From f1d67f4110a29292b372aec2b94243ea82a9f7a2 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 1 Jul 2020 15:46:15 -0700
Subject: [PATCH 05/16] update export.py

---
 models/export.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/models/export.py b/models/export.py
index 2aa6ce403ac6..1c78d3a8b19a 100644
--- a/models/export.py
+++ b/models/export.py
@@ -1,4 +1,4 @@
-"""Exports a YOLOv5 *.pt model to *.onnx and *.torchscript formats
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
 
 Usage:
     $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
@@ -30,20 +30,20 @@
     model.model[-1].export = True  # set Detect() layer export=True
     _ = model(img)  # dry run
 
-    # Export to torchscript
+    # Export to TorchScript
     try:
         f = opt.weights.replace('.pt', '.torchscript')  # filename
         ts = torch.jit.trace(model, img)
         ts.save(f)
-        print('Torchscript export success, saved as %s' % f)
-    except:
-        print('Torchscript export failed.')
+        print('TorchScript export success, saved as %s' % f)
+    except Exception as e:
+        print('TorchScript export failed: %s' % e)
 
     # Export to ONNX
     try:
         f = opt.weights.replace('.pt', '.onnx')  # filename
         model.fuse()  # only for ONNX
-        torch.onnx.export(model, img, f, verbose=False, opset_version=11, input_names=['images'],
+        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
                           output_names=['output'])  # output_names=['classes', 'boxes']
 
         # Checks
@@ -51,5 +51,5 @@
         onnx.checker.check_model(onnx_model)  # check onnx model
         print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable representation of the graph
         print('ONNX export success, saved as %s\nView with https://github.com/lutzroeder/netron' % f)
-    except:
-        print('ONNX export failed.')
+    except Exception as e:
+        print('ONNX export failed: %s' % e)

From a62a1c2c679cc6de730debf1529f073d10180452 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 1 Jul 2020 16:14:49 -0700
Subject: [PATCH 06/16] export.py update

---
 detect.py        |  8 ++++----
 models/export.py | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/detect.py b/detect.py
index bb84a0df0c2c..93faf6da4553 100644
--- a/detect.py
+++ b/detect.py
@@ -156,9 +156,9 @@ def detect(save_img=False):
     print(opt)
 
     with torch.no_grad():
-        detect()
+        # detect()
 
         # Update all models
-        # for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
-        #    detect()
-        #    create_pretrained(opt.weights, opt.weights)
+        for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
+            detect()
+            create_pretrained(opt.weights, opt.weights)
diff --git a/models/export.py b/models/export.py
index 1c78d3a8b19a..bb310f3f89a0 100644
--- a/models/export.py
+++ b/models/export.py
@@ -6,8 +6,6 @@
 
 import argparse
 
-import onnx
-
 from models.common import *
 from utils import google_utils
 
@@ -21,7 +19,7 @@
     print(opt)
 
     # Input
-    img = torch.zeros((opt.batch_size, 3, *opt.img_size))  # image size, (1, 3, 320, 192) iDetection
+    img = torch.zeros((opt.batch_size, 3, *opt.img_size))  # image size(1,3,320,192) iDetection
 
     # Load PyTorch model
     google_utils.attempt_download(opt.weights)
@@ -30,7 +28,7 @@
     model.model[-1].export = True  # set Detect() layer export=True
     _ = model(img)  # dry run
 
-    # Export to TorchScript
+    # TorchScript export
     try:
         f = opt.weights.replace('.pt', '.torchscript')  # filename
         ts = torch.jit.trace(model, img)
@@ -39,8 +37,10 @@
     except Exception as e:
         print('TorchScript export failed: %s' % e)
 
-    # Export to ONNX
+    # ONNX export
     try:
+        import onnx
+
         f = opt.weights.replace('.pt', '.onnx')  # filename
         model.fuse()  # only for ONNX
         torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],

From 5323ad224d90680da9cdb7fd8b82089750b7252e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 1 Jul 2020 16:15:25 -0700
Subject: [PATCH 07/16] export.py update

---
 detect.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/detect.py b/detect.py
index 93faf6da4553..2650c202d49d 100644
--- a/detect.py
+++ b/detect.py
@@ -156,9 +156,9 @@ def detect(save_img=False):
     print(opt)
 
     with torch.no_grad():
-        # detect()
+        detect()
 
-        # Update all models
-        for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
-            detect()
-            create_pretrained(opt.weights, opt.weights)
+        # # Update all models
+        # for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt', 'yolov3-spp.pt']:
+        #     detect()
+        #     create_pretrained(opt.weights, opt.weights)

From 1fca7a7f2461f6e178833ebc7d938fea86a6bf84 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Wed, 1 Jul 2020 19:15:59 -0700
Subject: [PATCH 08/16] autopad() update in common.py

---
 models/common.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/models/common.py b/models/common.py
index 6a5972311f77..2c2d600394c1 100644
--- a/models/common.py
+++ b/models/common.py
@@ -3,9 +3,11 @@
 from utils.utils import *
 
 
-def autopad(k):
+def autopad(k, p=None):  # kernel, padding
     # Pad to 'same'
-    return k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
 
 
 def DWConv(c1, c2, k=1, s=1, act=True):
@@ -17,7 +19,7 @@ class Conv(nn.Module):
     # Standard convolution
     def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
         super(Conv, self).__init__()
-        self.conv = nn.Conv2d(c1, c2, k, s, p or autopad(k), groups=g, bias=False)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
         self.bn = nn.BatchNorm2d(c2)
         self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
 

From 53cdaf6bf5fcdf28b140d3898d59876ceee5fac0 Mon Sep 17 00:00:00 2001
From: yxNONG <62932917+yxNONG@users.noreply.github.com>
Date: Thu, 2 Jul 2020 13:48:19 +0800
Subject: [PATCH 09/16] Update utils.py

---
 utils/utils.py | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index c33f41f71410..220599935360 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -421,7 +421,9 @@ def compute_loss(p, targets, model):  # predictions, targets, model
     ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
     lcls, lbox, lobj = ft([0]), ft([0]), ft([0])
     tcls, tbox, indices, anchors = build_targets(p, targets, model)  # targets
-    h = model.hyp  # hyperparameters
+    h = model.module.hyp if hasattr(model, 'module') else model.hyp # hyperparameters
+    nc = model.module.nc if hasattr(model, 'module') else model.nc
+    gr = model.module.gr if hasattr(model, 'module') else model.gr
     red = 'mean'  # Loss reduction (sum or mean)
 
     # Define criteria
@@ -455,10 +457,10 @@ def compute_loss(p, targets, model):  # predictions, targets, model
             lbox += (1.0 - giou).sum() if red == 'sum' else (1.0 - giou).mean()  # giou loss
 
             # Obj
-            tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio
+            tobj[b, a, gj, gi] = (1.0 - gr) + gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio
 
             # Class
-            if model.nc > 1:  # cls loss (only if multiple classes)
+            if nc > 1:  # cls loss (only if multiple classes)
                 t = torch.full_like(ps[:, 5:], cn)  # targets
                 t[range(nb), tcls[i]] = cp
                 lcls += BCEcls(ps[:, 5:], t)  # BCE
@@ -477,7 +479,7 @@ def compute_loss(p, targets, model):  # predictions, targets, model
         g = 3.0  # loss gain
         lobj *= g / bs
         if nt:
-            lcls *= g / nt / model.nc
+            lcls *= g / nt / nc
             lbox *= g / nt
 
     loss = lbox + lobj + lcls
@@ -488,6 +490,8 @@ def build_targets(p, targets, model):
     # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
     det = model.module.model[-1] if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) \
         else model.model[-1]  # Detect() module
+    hyp = model.module.hyp if hasattr(model, 'module') else model.hyp
+    
     na, nt = det.na, targets.shape[0]  # number of anchors, targets
     tcls, tbox, indices, anch = [], [], [], []
     gain = torch.ones(6, device=targets.device)  # normalized to gridspace gain
@@ -503,7 +507,7 @@ def build_targets(p, targets, model):
         a, t, offsets = [], targets * gain, 0
         if nt:
             r = t[None, :, 4:6] / anchors[:, None]  # wh ratio
-            j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t']  # compare
+            j = torch.max(r, 1. / r).max(2)[0] < hyp['anchor_t']  # compare
             # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
             a, t = at[j], t.repeat(na, 1, 1)[j]  # filter
 

From 1aa2b679333657cc20a702dabb1b5de3315cf577 Mon Sep 17 00:00:00 2001
From: yxNONG <62932917+yxNONG@users.noreply.github.com>
Date: Thu, 2 Jul 2020 13:51:52 +0800
Subject: [PATCH 10/16] Update train.py

---
 train.py | 28 ++++++++++------------------
 1 file changed, 10 insertions(+), 18 deletions(-)

diff --git a/train.py b/train.py
index d933a5de074d..3b7c9a575678 100644
--- a/train.py
+++ b/train.py
@@ -147,15 +147,6 @@ def train(hyp):
     # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
     # plot_lr_scheduler(optimizer, scheduler, epochs)
 
-    # Initialize distributed training
-    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
-        dist.init_process_group(backend='nccl',  # distributed backend
-                                init_method='tcp://127.0.0.1:9999',  # init method
-                                world_size=1,  # number of nodes
-                                rank=0)  # node rank
-        model = torch.nn.parallel.DistributedDataParallel(model)
-        # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
-
     # Trainloader
     dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
                                             hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
@@ -173,6 +164,15 @@ def train(hyp):
     model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
     model.names = data_dict['names']
+    
+    # Initialize distributed training
+    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
+        dist.init_process_group(backend='nccl',  # distributed backend
+                                init_method='tcp://127.0.0.1:9999',  # init method
+                                world_size=1,  # number of nodes
+                                rank=0)  # node rank
+        model = torch.nn.parallel.DistributedDataParallel(model)
+        # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
 
     # Class frequency
     labels = np.concatenate(dataset.labels, 0)
@@ -289,7 +289,7 @@ def train(hyp):
                                              batch_size=batch_size,
                                              imgsz=imgsz_test,
                                              save_json=final_epoch and opt.data.endswith(os.sep + 'coco.yaml'),
-                                             model=ema.ema,
+                                             model=ema.ema.module if hasattr(model, 'module') else ema.ema,
                                              single_cls=opt.single_cls,
                                              dataloader=testloader)
 
@@ -315,14 +315,6 @@ def train(hyp):
         # Save model
         save = (not opt.nosave) or (final_epoch and not opt.evolve)
         if save:
-            if hasattr(model, 'module'):
-                # Duplicate Model parameters for Multi-GPU save
-                ema.ema.module.nc = model.nc  # attach number of classes to model
-                ema.ema.module.hyp = model.hyp  # attach hyperparameters to model
-                ema.ema.module.gr = model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
-                ema.ema.module.class_weights = model.class_weights # attach class weights
-                ema.ema.module.names = data_dict['names']
-                
             with open(results_file, 'r') as f:  # create checkpoint
                 ckpt = {'epoch': epoch,
                         'best_fitness': best_fitness,

From 13f69777a69c4a6056c7cb8499c7e3910868122d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 2 Jul 2020 09:26:03 -0700
Subject: [PATCH 11/16] typo fix

---
 train.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/train.py b/train.py
index fa672d1cd194..0cc3f31003ae 100644
--- a/train.py
+++ b/train.py
@@ -119,7 +119,7 @@ def train(hyp):
             model.load_state_dict(ckpt['model'], strict=False)
         except KeyError as e:
             s = "%s is not compatible with %s. This may be due to model differences or %s may be out of date. " \
-                "Please delete or update %s and try again, or use --weights '' to train from scatch." \
+                "Please delete or update %s and try again, or use --weights '' to train from scratch." \
                 % (opt.weights, opt.cfg, opt.weights, opt.weights)
             raise KeyError(s) from e
 

From 597ed4ce630071cd5809c65c1322f5b961ba8c9c Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 2 Jul 2020 12:00:55 -0700
Subject: [PATCH 12/16] Update train.py

---
 train.py | 29 +++++++++++++++++------------
 1 file changed, 17 insertions(+), 12 deletions(-)

diff --git a/train.py b/train.py
index 3b7c9a575678..dfc9ecdbdbc5 100644
--- a/train.py
+++ b/train.py
@@ -79,7 +79,6 @@ def train(hyp):
     # Create model
     model = Model(opt.cfg).to(device)
     assert model.md['nc'] == nc, '%s nc=%g classes but %s nc=%g classes' % (opt.data, nc, opt.cfg, model.md['nc'])
-   
 
     # Image sizes
     gs = int(max(model.stride))  # grid size (max stride)
@@ -133,7 +132,13 @@ def train(hyp):
             with open(results_file, 'w') as file:
                 file.write(ckpt['training_results'])  # write results.txt
 
+        # epochs
         start_epoch = ckpt['epoch'] + 1
+        if epochs < start_epoch:
+            print('%s has been trained for %g epochs. Fine-tuning for %g additional epochs.' %
+                  (opt.weights, ckpt['epoch'], epochs))
+            epochs += ckpt['epoch']  # finetune additional epochs
+
         del ckpt
 
     # Mixed precision training https://github.com/NVIDIA/apex
@@ -147,6 +152,15 @@ def train(hyp):
     # https://discuss.pytorch.org/t/a-problem-occured-when-resuming-an-optimizer/28822
     # plot_lr_scheduler(optimizer, scheduler, epochs)
 
+    # Initialize distributed training
+    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
+        dist.init_process_group(backend='nccl',  # distributed backend
+                                init_method='tcp://127.0.0.1:9999',  # init method
+                                world_size=1,  # number of nodes
+                                rank=0)  # node rank
+        model = torch.nn.parallel.DistributedDataParallel(model)
+        # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
+
     # Trainloader
     dataloader, dataset = create_dataloader(train_path, imgsz, batch_size, gs, opt,
                                             hyp=hyp, augment=True, cache=opt.cache_images, rect=opt.rect)
@@ -155,7 +169,7 @@ def train(hyp):
 
     # Testloader
     testloader = create_dataloader(test_path, imgsz_test, batch_size, gs, opt,
-                                            hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
+                                   hyp=hyp, augment=False, cache=opt.cache_images, rect=True)[0]
 
     # Model parameters
     hyp['cls'] *= nc / 80.  # scale coco-tuned hyp['cls'] to current dataset
@@ -164,15 +178,6 @@ def train(hyp):
     model.gr = 1.0  # giou loss ratio (obj_loss = 1.0 or giou)
     model.class_weights = labels_to_class_weights(dataset.labels, nc).to(device)  # attach class weights
     model.names = data_dict['names']
-    
-    # Initialize distributed training
-    if device.type != 'cpu' and torch.cuda.device_count() > 1 and torch.distributed.is_available():
-        dist.init_process_group(backend='nccl',  # distributed backend
-                                init_method='tcp://127.0.0.1:9999',  # init method
-                                world_size=1,  # number of nodes
-                                rank=0)  # node rank
-        model = torch.nn.parallel.DistributedDataParallel(model)
-        # pip install torch==1.4.0+cu100 torchvision==0.5.0+cu100 -f https://download.pytorch.org/whl/torch_stable.html
 
     # Class frequency
     labels = np.concatenate(dataset.labels, 0)
@@ -373,7 +378,7 @@ def train(hyp):
     parser.add_argument('--multi-scale', action='store_true', help='vary img-size +/- 50%')
     parser.add_argument('--single-cls', action='store_true', help='train as single-class dataset')
     opt = parser.parse_args()
-    opt.weights = last if opt.resume else opt.weights
+    opt.weights = last if opt.resume and not opt.weights else opt.weights
     opt.cfg = check_file(opt.cfg)  # check file
     opt.data = check_file(opt.data)  # check file
     print(opt)

From fc7c42723d8008438b217072dfa088612ac76225 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 2 Jul 2020 12:01:43 -0700
Subject: [PATCH 13/16] Update utils.py

---
 utils/utils.py | 16 ++++++----------
 1 file changed, 6 insertions(+), 10 deletions(-)

diff --git a/utils/utils.py b/utils/utils.py
index 220599935360..305486a5f6a3 100755
--- a/utils/utils.py
+++ b/utils/utils.py
@@ -47,7 +47,7 @@ def check_git_status():
 
 def check_img_size(img_size, s=32):
     # Verify img_size is a multiple of stride s
-    new_size = make_divisible(img_size, s)  # ceil gs-multiple
+    new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
     if new_size != img_size:
         print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
     return new_size
@@ -421,9 +421,7 @@ def compute_loss(p, targets, model):  # predictions, targets, model
     ft = torch.cuda.FloatTensor if p[0].is_cuda else torch.Tensor
     lcls, lbox, lobj = ft([0]), ft([0]), ft([0])
     tcls, tbox, indices, anchors = build_targets(p, targets, model)  # targets
-    h = model.module.hyp if hasattr(model, 'module') else model.hyp # hyperparameters
-    nc = model.module.nc if hasattr(model, 'module') else model.nc
-    gr = model.module.gr if hasattr(model, 'module') else model.gr
+    h = model.hyp  # hyperparameters
     red = 'mean'  # Loss reduction (sum or mean)
 
     # Define criteria
@@ -457,10 +455,10 @@ def compute_loss(p, targets, model):  # predictions, targets, model
             lbox += (1.0 - giou).sum() if red == 'sum' else (1.0 - giou).mean()  # giou loss
 
             # Obj
-            tobj[b, a, gj, gi] = (1.0 - gr) + gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio
+            tobj[b, a, gj, gi] = (1.0 - model.gr) + model.gr * giou.detach().clamp(0).type(tobj.dtype)  # giou ratio
 
             # Class
-            if nc > 1:  # cls loss (only if multiple classes)
+            if model.nc > 1:  # cls loss (only if multiple classes)
                 t = torch.full_like(ps[:, 5:], cn)  # targets
                 t[range(nb), tcls[i]] = cp
                 lcls += BCEcls(ps[:, 5:], t)  # BCE
@@ -479,7 +477,7 @@ def compute_loss(p, targets, model):  # predictions, targets, model
         g = 3.0  # loss gain
         lobj *= g / bs
         if nt:
-            lcls *= g / nt / nc
+            lcls *= g / nt / model.nc
             lbox *= g / nt
 
     loss = lbox + lobj + lcls
@@ -490,8 +488,6 @@ def build_targets(p, targets, model):
     # Build targets for compute_loss(), input targets(image,class,x,y,w,h)
     det = model.module.model[-1] if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel) \
         else model.model[-1]  # Detect() module
-    hyp = model.module.hyp if hasattr(model, 'module') else model.hyp
-    
     na, nt = det.na, targets.shape[0]  # number of anchors, targets
     tcls, tbox, indices, anch = [], [], [], []
     gain = torch.ones(6, device=targets.device)  # normalized to gridspace gain
@@ -507,7 +503,7 @@ def build_targets(p, targets, model):
         a, t, offsets = [], targets * gain, 0
         if nt:
             r = t[None, :, 4:6] / anchors[:, None]  # wh ratio
-            j = torch.max(r, 1. / r).max(2)[0] < hyp['anchor_t']  # compare
+            j = torch.max(r, 1. / r).max(2)[0] < model.hyp['anchor_t']  # compare
             # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n) = wh_iou(anchors(3,2), gwh(n,2))
             a, t = at[j], t.repeat(na, 1, 1)[j]  # filter
 

From f02481c73a4f8e3dbc0ae809b50310c0b2d700c9 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 2 Jul 2020 12:03:45 -0700
Subject: [PATCH 14/16] Update torch_utils.py

---
 utils/torch_utils.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index e069792e6e3f..a62adc9cf122 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -54,6 +54,11 @@ def time_synchronized():
     return time.time()
 
 
+def is_parallel(model):
+    # is model is parallel with DP or DDP
+    return type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel)
+
+
 def initialize_weights(model):
     for m in model.modules():
         t = type(m)
@@ -111,8 +116,8 @@ def model_info(model, verbose=False):
 
     try:  # FLOPS
         from thop import profile
-        macs, _ = profile(model, inputs=(torch.zeros(1, 3, 480, 640),), verbose=False)
-        fs = ', %.1f GFLOPS' % (macs / 1E9 * 2)
+        flops = profile(deepcopy(model), inputs=(torch.zeros(1, 3, 64, 64),), verbose=False)[0] / 1E9 * 2
+        fs = ', %.1f GFLOPS' % (flops * 100)  # 640x640 FLOPS
     except:
         fs = ''
 
@@ -185,7 +190,7 @@ def update(self, model):
         self.updates += 1
         d = self.decay(self.updates)
         with torch.no_grad():
-            if type(model) in (nn.parallel.DataParallel, nn.parallel.DistributedDataParallel):
+            if is_parallel(model):
                 msd, esd = model.module.state_dict(), self.ema.module.state_dict()
             else:
                 msd, esd = model.state_dict(), self.ema.state_dict()
@@ -196,7 +201,8 @@ def update(self, model):
                     v += (1. - d) * msd[k].detach()
 
     def update_attr(self, model):
-        # Assign attributes (which may change during training)
-        for k in model.__dict__.keys():
-            if not k.startswith('_'):
-                setattr(self.ema, k, getattr(model, k))
+        # Update class attributes
+        ema = self.ema.module if is_parallel(model) else self.ema
+        for k, v in model.__dict__.items():
+            if not k.startswith('_') and k != 'module':
+                setattr(ema, k, v)

From 6ca3f35cd4e6834adc116e2d1ebe2defa082c7e8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 2 Jul 2020 16:41:03 -0700
Subject: [PATCH 15/16] update .dockerignore

---
 .dockerignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.dockerignore b/.dockerignore
index a68626df5f2e..42f241f28c7b 100644
--- a/.dockerignore
+++ b/.dockerignore
@@ -14,8 +14,10 @@ data/samples/*
 # Neural Network weights -----------------------------------------------------------------------------------------------
 **/*.weights
 **/*.pt
+**/*.pth
 **/*.onnx
 **/*.mlmodel
+**/*.torchscript
 
 
 # Below Copied From .gitignore -----------------------------------------------------------------------------------------

From 3bdea3f697d4fce36c8e24a0701c0f419fa8f63a Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Thu, 2 Jul 2020 21:24:26 -0700
Subject: [PATCH 16/16] strip_optimizer() bug fix #253

---
 train.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/train.py b/train.py
index aabf4f1f62df..08a0fe5db6be 100644
--- a/train.py
+++ b/train.py
@@ -336,17 +336,17 @@ def train(hyp):
         # end epoch ----------------------------------------------------------------------------------------------------
     # end training
 
-    n = opt.name
-    if len(n):
-        n = '_' + n if not n.isnumeric() else n
-        fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
-        for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
-            if os.path.exists(f1):
-                os.rename(f1, f2)  # rename
-                ispt = f2.endswith('.pt')  # is *.pt
-                strip_optimizer(f2) if ispt else None  # strip optimizer
-                os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None  # upload
-
+    # Strip optimizers
+    n = ('_' if len(opt.name) and not opt.name.isnumeric() else '') + opt.name
+    fresults, flast, fbest = 'results%s.txt' % n, wdir + 'last%s.pt' % n, wdir + 'best%s.pt' % n
+    for f1, f2 in zip([wdir + 'last.pt', wdir + 'best.pt', 'results.txt'], [flast, fbest, fresults]):
+        if os.path.exists(f1):
+            os.rename(f1, f2)  # rename
+            ispt = f2.endswith('.pt')  # is *.pt
+            strip_optimizer(f2) if ispt else None  # strip optimizer
+            os.system('gsutil cp %s gs://%s/weights' % (f2, opt.bucket)) if opt.bucket and ispt else None  # upload
+
+    # Finish
     if not opt.evolve:
         plot_results()  # save as results.png
     print('%g epochs completed in %.3f hours.\n' % (epoch - start_epoch + 1, (time.time() - t0) / 3600))