From b90e8bfcd0521274c2bb45b5a288e30d626512b3 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:16:26 +0100
Subject: [PATCH 01/63] New `DetectMultiBackend()` class

---
 detect.py            | 124 ++++++++-----------------------------------
 models/common.py     |  94 +++++++++++++++++++++++++++++++-
 utils/general.py     |   3 +-
 utils/torch_utils.py |  20 -------
 4 files changed, 118 insertions(+), 123 deletions(-)

diff --git a/detect.py b/detect.py
index 661a0b86bc99..7080f83497fe 100644
--- a/detect.py
+++ b/detect.py
@@ -14,12 +14,10 @@
 
 import argparse
 import os
-import platform
 import sys
 from pathlib import Path
 
 import cv2
-import numpy as np
 import torch
 import torch.backends.cudnn as cudnn
 
@@ -29,13 +27,12 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
+from models.common import DetectMultiBackend
 from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
-from utils.general import (LOGGER, apply_classifier, check_file, check_img_size, check_imshow, check_requirements,
-                           check_suffix, colorstr, increment_path, non_max_suppression, print_args, scale_coords,
-                           strip_optimizer, xyxy2xywh)
+from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
+                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
-from utils.torch_utils import load_classifier, select_device, time_sync
+from utils.torch_utils import select_device, time_sync
 
 
 @torch.no_grad()
@@ -82,55 +79,9 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     half &= device.type != 'cpu'  # half precision only supported on CUDA
 
     # Load model
-    w = str(weights[0] if isinstance(weights, list) else weights)
-    classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
-    check_suffix(w, suffixes)  # check weights have acceptable suffix
-    pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
-    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-    if pt:
-        model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
-        stride = int(model.stride.max())  # model stride
-        names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-        if half:
-            model.half()  # to FP16
-        if classify:  # second-stage classifier
-            modelc = load_classifier(name='resnet50', n=2)  # initialize
-            modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
-    elif onnx:
-        if dnn:
-            check_requirements(('opencv-python>=4.5.4',))
-            net = cv2.dnn.readNetFromONNX(w)
-        else:
-            check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
-            import onnxruntime
-            session = onnxruntime.InferenceSession(w, None)
-    else:  # TensorFlow models
-        import tensorflow as tf
-        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
-            def wrap_frozen_graph(gd, inputs, outputs):
-                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped import
-                return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
-                               tf.nest.map_structure(x.graph.as_graph_element, outputs))
-
-            graph_def = tf.Graph().as_graph_def()
-            graph_def.ParseFromString(open(w, 'rb').read())
-            frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
-        elif saved_model:
-            model = tf.keras.models.load_model(w)
-        elif tflite:
-            if "edgetpu" in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-                import tflite_runtime.interpreter as tflri
-                delegate = {'Linux': 'libedgetpu.so.1',  # install libedgetpu https://coral.ai/software/#edgetpu-runtime
-                            'Darwin': 'libedgetpu.1.dylib',
-                            'Windows': 'edgetpu.dll'}[platform.system()]
-                interpreter = tflri.Interpreter(model_path=w, experimental_delegates=[tflri.load_delegate(delegate)])
-            else:
-                interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
-            interpreter.allocate_tensors()  # allocate
-            input_details = interpreter.get_input_details()  # inputs
-            output_details = interpreter.get_output_details()  # outputs
-            int8 = input_details[0]['dtype'] == np.uint8  # is TFLite quantized uint8 model
-    imgsz = check_img_size(imgsz, s=stride)  # check image size
+    model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+    stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
+    imgsz = check_img_size(imgsz, s=model.stride)  # check image size
 
     # Dataloader
     if webcam:
@@ -145,52 +96,24 @@ def wrap_frozen_graph(gd, inputs, outputs):
 
     # Run inference
     if pt and device.type != 'cpu':
-        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters())))  # run once
+        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # run once
     dt, seen = [0.0, 0.0, 0.0], 0
-    for path, img, im0s, vid_cap, s in dataset:
+    for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
         if onnx:
-            img = img.astype('float32')
+            im = im.astype('float32')
         else:
-            img = torch.from_numpy(img).to(device)
-            img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255  # 0 - 255 to 0.0 - 1.0
-        if len(img.shape) == 3:
-            img = img[None]  # expand for batch dim
+            im = torch.from_numpy(im).to(device)
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        if len(im.shape) == 3:
+            im = im[None]  # expand for batch dim
         t2 = time_sync()
         dt[0] += t2 - t1
 
         # Inference
-        if pt:
-            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-            pred = model(img, augment=augment, visualize=visualize)[0]
-        elif onnx:
-            if dnn:
-                net.setInput(img)
-                pred = torch.tensor(net.forward())
-            else:
-                pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
-        else:  # tensorflow model (tflite, pb, saved_model)
-            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
-            if pb:
-                pred = frozen_func(x=tf.constant(imn)).numpy()
-            elif saved_model:
-                pred = model(imn, training=False).numpy()
-            elif tflite:
-                if int8:
-                    scale, zero_point = input_details[0]['quantization']
-                    imn = (imn / scale + zero_point).astype(np.uint8)  # de-scale
-                interpreter.set_tensor(input_details[0]['index'], imn)
-                interpreter.invoke()
-                pred = interpreter.get_tensor(output_details[0]['index'])
-                if int8:
-                    scale, zero_point = output_details[0]['quantization']
-                    pred = (pred.astype(np.float32) - zero_point) * scale  # re-scale
-            pred[..., 0] *= imgsz[1]  # x
-            pred[..., 1] *= imgsz[0]  # y
-            pred[..., 2] *= imgsz[1]  # w
-            pred[..., 3] *= imgsz[0]  # h
-            pred = torch.tensor(pred)
+        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+        pred = model(im, augment=augment, visualize=visualize)
         t3 = time_sync()
         dt[1] += t3 - t2
 
@@ -199,8 +122,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
         dt[2] += time_sync() - t3
 
         # Second-stage classifier (optional)
-        if classify:
-            pred = apply_classifier(pred, modelc, img, im0s)
+        # pred = apply_classifier(pred, classifier_model, im, im0s)
 
         # Process predictions
         for i, det in enumerate(pred):  # per image
@@ -212,15 +134,15 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
 
             p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # img.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
-            s += '%gx%g ' % img.shape[2:]  # print string
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
                 # Print results
                 for c in det[:, -1].unique():
@@ -282,7 +204,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
diff --git a/models/common.py b/models/common.py
index 8035ef11a791..7f1ecfe0ee66 100644
--- a/models/common.py
+++ b/models/common.py
@@ -5,10 +5,12 @@
 
 import logging
 import math
+import platform
 import warnings
 from copy import copy
 from pathlib import Path
 
+import cv2
 import numpy as np
 import pandas as pd
 import requests
@@ -18,7 +20,8 @@
 from torch.cuda import amp
 
 from utils.datasets import exif_transpose, letterbox
-from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh
+from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \
+    check_suffix, check_requirements
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 
@@ -272,6 +275,95 @@ def forward(self, x):
         return torch.cat(x, self.d)
 
 
+class DetectMultiBackend(nn.Module):
+    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
+        super().__init__()
+        # Load model
+        w = str(weights[0] if isinstance(weights, list) else weights)
+        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
+        check_suffix(w, suffixes)  # check weights have acceptable suffix
+        pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+        stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+        if pt:
+            from models.experimental import attempt_load
+            model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
+            stride = int(model.stride.max())  # model stride
+            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            if half:
+                model.half()  # to FP16
+        elif onnx:
+            if dnn:  # OpenCV DNN
+                check_requirements(('opencv-python>=4.5.4',))
+                net = cv2.dnn.readNetFromONNX(w)
+            else:  # ONNX Runtime
+                check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
+                import onnxruntime
+                session = onnxruntime.InferenceSession(w, None)
+        else:  # TensorFlow model (TFLite, pb, saved_model)
+            import tensorflow as tf
+            if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+                def wrap_frozen_graph(gd, inputs, outputs):
+                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
+                    return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
+                                   tf.nest.map_structure(x.graph.as_graph_element, outputs))
+
+                graph_def = tf.Graph().as_graph_def()
+                graph_def.ParseFromString(open(w, 'rb').read())
+                frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
+            elif saved_model:
+                model = tf.keras.models.load_model(w)
+            elif tflite:
+                if "edgetpu" in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                    import tflite_runtime.interpreter as tflri
+                    delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
+                                'Darwin': 'libedgetpu.1.dylib',
+                                'Windows': 'edgetpu.dll'}[platform.system()]
+                    interpreter = tflri.Interpreter(model_path=w,
+                                                    experimental_delegates=[tflri.load_delegate(delegate)])
+                else:
+                    interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
+                interpreter.allocate_tensors()  # allocate
+                input_details = interpreter.get_input_details()  # inputs
+                output_details = interpreter.get_output_details()  # outputs
+
+        self.__dict__.update(locals())  # all all variables to self
+
+    def forward(self, im, augment=False, profile=False, visualize=False):
+        # Inference
+        if self.pt:
+            y = self.model(im, augment=augment, visualize=visualize)[0]
+        elif self.onnx:
+            if self.dnn:  # OpenCV DNN
+                self.net.setInput(im)
+                y = self.net.forward()
+            else:  # ONNX Runtime
+                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+        else:  # TensorFlow model (TFLite, pb, saved_model)
+            import tensorflow as tf
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
+            if self.pb:
+                y = self.frozen_func(x=tf.constant(im)).numpy()
+            elif self.saved_model:
+                y = self.model(im, training=False).numpy()
+            elif self.tflite:
+                input, output = self.input_details[0], self.output_details[0]
+                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                if int8:
+                    scale, zero_point = input['quantization']
+                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
+                self.interpreter.set_tensor(input['index'], im)
+                self.interpreter.invoke()
+                y = self.interpreter.get_tensor(output['index'])
+                if int8:
+                    scale, zero_point = output['quantization']
+                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
+            y[..., 0] *= im.shape[2]  # x
+            y[..., 1] *= im.shape[1]  # y
+            y[..., 2] *= im.shape[2]  # w
+            y[..., 3] *= im.shape[1]  # h
+        return y if self.pt else torch.tensor(y)
+
+
 class AutoShape(nn.Module):
     # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
     conf = 0.25  # NMS confidence threshold
diff --git a/utils/general.py b/utils/general.py
index 0f45d72498fe..86c7d90c8220 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -785,7 +785,8 @@ def print_mutation(results, hyp, save_dir, bucket):
 
 
 def apply_classifier(x, model, img, im0):
-    # Apply a second stage classifier to yolo outputs
+    # Apply a second stage classifier to YOLO outputs
+    # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
     im0 = [im0] if isinstance(im0, np.ndarray) else im0
     for i, d in enumerate(x):  # per image
         if d is not None and len(d):
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index b36e98d0b656..d0f143b1a30b 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -18,7 +18,6 @@
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
 
 from utils.general import LOGGER
 
@@ -237,25 +236,6 @@ def model_info(model, verbose=False, img_size=640):
     LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
 
 
-def load_classifier(name='resnet101', n=2):
-    # Loads a pretrained model reshaped to n-class output
-    model = torchvision.models.__dict__[name](pretrained=True)
-
-    # ResNet model properties
-    # input_size = [3, 224, 224]
-    # input_space = 'RGB'
-    # input_range = [0, 1]
-    # mean = [0.485, 0.456, 0.406]
-    # std = [0.229, 0.224, 0.225]
-
-    # Reshape output to n classes
-    filters = model.fc.weight.shape[1]
-    model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
-    model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
-    model.fc.out_features = n
-    return model
-
-
 def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
     # scales img(bs,3,y,x) by ratio constrained to gs-multiple
     if ratio == 1.0:

From 0d9bc34be4141e85034c9b1924103d07499f6a2f Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 7 Nov 2021 13:17:56 +0000
Subject: [PATCH 02/63] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index 7f1ecfe0ee66..b0606ddca897 100644
--- a/models/common.py
+++ b/models/common.py
@@ -20,8 +20,8 @@
 from torch.cuda import amp
 
 from utils.datasets import exif_transpose, letterbox
-from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \
-    check_suffix, check_requirements
+from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible,
+                           non_max_suppression, scale_coords, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 

From 795fbe86baa2dd822f3a62be5dcd2e0d33ba70c6 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:20:08 +0100
Subject: [PATCH 03/63] pb to pt fix

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index 7080f83497fe..f5ec42e612e2 100644
--- a/detect.py
+++ b/detect.py
@@ -204,7 +204,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')

From 9e1d0a6fd9108bb3f367202cad0a04c9ccebf1a8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:34:27 +0100
Subject: [PATCH 04/63] Cleanup

---
 models/common.py | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/models/common.py b/models/common.py
index b0606ddca897..5f1b05e88fe9 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,28 +277,27 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
+        # MultiBackend model load
         super().__init__()
-        # Load model
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
         pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
         if pt:
-            from models.experimental import attempt_load
+            from models.experimental import attempt_load  # scoped to avoid circular import
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             if half:
                 model.half()  # to FP16
-        elif onnx:
-            if dnn:  # OpenCV DNN
-                check_requirements(('opencv-python>=4.5.4',))
-                net = cv2.dnn.readNetFromONNX(w)
-            else:  # ONNX Runtime
-                check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
-                import onnxruntime
-                session = onnxruntime.InferenceSession(w, None)
+        elif dnn:  # ONNX OpenCV DNN
+            check_requirements(('opencv-python>=4.5.4',))
+            net = cv2.dnn.readNetFromONNX(w)
+        elif onnx:  # ONNX Runtime
+            check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
+            import onnxruntime
+            session = onnxruntime.InferenceSession(w, None)
         else:  # TensorFlow model (TFLite, pb, saved_model)
             import tensorflow as tf
             if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
@@ -325,24 +324,21 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 interpreter.allocate_tensors()  # allocate
                 input_details = interpreter.get_input_details()  # inputs
                 output_details = interpreter.get_output_details()  # outputs
+        self.__dict__.update(locals())  # assign all variables to self
 
-        self.__dict__.update(locals())  # all all variables to self
-
-    def forward(self, im, augment=False, profile=False, visualize=False):
-        # Inference
+    def forward(self, im, augment=False, visualize=False):
+        # MultiBackend inference
         if self.pt:
-            y = self.model(im, augment=augment, visualize=visualize)[0]
-        elif self.onnx:
-            if self.dnn:  # OpenCV DNN
-                self.net.setInput(im)
-                y = self.net.forward()
-            else:  # ONNX Runtime
-                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+            return self.model(im, augment=augment, visualize=visualize)[0]
+        elif self.dnn:  # ONNX OpenCV DNN
+            self.net.setInput(im)
+            y = self.net.forward()
+        elif self.onnx:  # ONNX Runtime
+            y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
-            import tensorflow as tf
             im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
             if self.pb:
-                y = self.frozen_func(x=tf.constant(im)).numpy()
+                y = self.frozen_func(x=self.tf.constant(im)).numpy()
             elif self.saved_model:
                 y = self.model(im, training=False).numpy()
             elif self.tflite:
@@ -361,7 +357,7 @@ def forward(self, im, augment=False, profile=False, visualize=False):
             y[..., 1] *= im.shape[1]  # y
             y[..., 2] *= im.shape[2]  # w
             y[..., 3] *= im.shape[1]  # h
-        return y if self.pt else torch.tensor(y)
+        return torch.tensor(y)
 
 
 class AutoShape(nn.Module):

From 4f775882510b3e390f20cc8f380808091cb880ea Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:36:57 +0100
Subject: [PATCH 05/63] explicit apply_classifier path

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index f5ec42e612e2..b93bc521c332 100644
--- a/detect.py
+++ b/detect.py
@@ -122,7 +122,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
         dt[2] += time_sync() - t3
 
         # Second-stage classifier (optional)
-        # pred = apply_classifier(pred, classifier_model, im, im0s)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
 
         # Process predictions
         for i, det in enumerate(pred):  # per image

From 96c9462825469de16438af278ceb334193aa8930 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:40:19 +0100
Subject: [PATCH 06/63] Cleanup2

---
 models/common.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/models/common.py b/models/common.py
index 5f1b05e88fe9..2ea7088530e9 100644
--- a/models/common.py
+++ b/models/common.py
@@ -312,13 +312,12 @@ def wrap_frozen_graph(gd, inputs, outputs):
             elif saved_model:
                 model = tf.keras.models.load_model(w)
             elif tflite:
-                if "edgetpu" in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-                    import tflite_runtime.interpreter as tflri
+                if 'edgetpu' in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                    import tflite_runtime.interpreter as tfli
                     delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
                                 'Darwin': 'libedgetpu.1.dylib',
                                 'Windows': 'edgetpu.dll'}[platform.system()]
-                    interpreter = tflri.Interpreter(model_path=w,
-                                                    experimental_delegates=[tflri.load_delegate(delegate)])
+                    interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
                 else:
                     interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
                 interpreter.allocate_tensors()  # allocate

From 398d37788828f8b9b0c9e65016b98f6652af2627 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:41:32 +0100
Subject: [PATCH 07/63] Cleanup3

---
 models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index 2ea7088530e9..7359043cb805 100644
--- a/models/common.py
+++ b/models/common.py
@@ -311,8 +311,8 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
             elif saved_model:
                 model = tf.keras.models.load_model(w)
-            elif tflite:
-                if 'edgetpu' in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+            elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                if 'edgetpu' in w.lower():
                     import tflite_runtime.interpreter as tfli
                     delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
                                 'Darwin': 'libedgetpu.1.dylib',

From 89bf2f1bf4ee8cbf17038bf94348f2a93ca9c33a Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:45:06 +0100
Subject: [PATCH 08/63] Cleanup4

---
 models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index 7359043cb805..b309705da9b0 100644
--- a/models/common.py
+++ b/models/common.py
@@ -276,8 +276,8 @@ def forward(self, x):
 
 
 class DetectMultiBackend(nn.Module):
+    # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
     def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
-        # MultiBackend model load
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
@@ -326,7 +326,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
         self.__dict__.update(locals())  # assign all variables to self
 
     def forward(self, im, augment=False, visualize=False):
-        # MultiBackend inference
+        # YOLOv5 MultiBackend inference
         if self.pt:
             return self.model(im, augment=augment, visualize=visualize)[0]
         elif self.dnn:  # ONNX OpenCV DNN

From 47550b03baf225f826e27ddba5dd5c26b1f48b80 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:47:41 +0100
Subject: [PATCH 09/63] Cleanup5

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index b93bc521c332..a216ba2a59ca 100644
--- a/detect.py
+++ b/detect.py
@@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     # Load model
     model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
     stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
-    imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Dataloader
     if webcam:

From d08b356c72b70c5c48865c72fe4a02b86706fd97 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:33:48 +0100
Subject: [PATCH 10/63] Cleanup6

---
 models/common.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/models/common.py b/models/common.py
index b309705da9b0..a050521aa8e0 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,14 +277,14 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
-    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
+    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
         pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-        if pt:
+        if pt:  # PyTorch
             from models.experimental import attempt_load  # scoped to avoid circular import
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
@@ -292,9 +292,11 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
             if half:
                 model.half()  # to FP16
         elif dnn:  # ONNX OpenCV DNN
+            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
             check_requirements(('opencv-python>=4.5.4',))
             net = cv2.dnn.readNetFromONNX(w)
         elif onnx:  # ONNX Runtime
+            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
             check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
             import onnxruntime
             session = onnxruntime.InferenceSession(w, None)
@@ -306,34 +308,41 @@ def wrap_frozen_graph(gd, inputs, outputs):
                     return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
                                    tf.nest.map_structure(x.graph.as_graph_element, outputs))
 
+                LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')
                 graph_def = tf.Graph().as_graph_def()
                 graph_def.ParseFromString(open(w, 'rb').read())
                 frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
             elif saved_model:
+                LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')
                 model = tf.keras.models.load_model(w)
             elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
                 if 'edgetpu' in w.lower():
+                    LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...')
                     import tflite_runtime.interpreter as tfli
                     delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
                                 'Darwin': 'libedgetpu.1.dylib',
                                 'Windows': 'edgetpu.dll'}[platform.system()]
                     interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
                 else:
+                    LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
                     interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
                 interpreter.allocate_tensors()  # allocate
                 input_details = interpreter.get_input_details()  # inputs
                 output_details = interpreter.get_output_details()  # outputs
         self.__dict__.update(locals())  # assign all variables to self
 
-    def forward(self, im, augment=False, visualize=False):
+    def forward(self, im, augment=False, visualize=False, val=False):
         # YOLOv5 MultiBackend inference
-        if self.pt:
-            return self.model(im, augment=augment, visualize=visualize)[0]
-        elif self.dnn:  # ONNX OpenCV DNN
-            self.net.setInput(im)
-            y = self.net.forward()
-        elif self.onnx:  # ONNX Runtime
-            y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+        if self.pt:  # PyTorch
+            y = self.model(im, augment=augment, visualize=visualize)
+            return y if val else y[0]
+        elif self.onnx:  # ONNX
+            im = np.array(im)
+            if self.dnn:  # ONNX OpenCV DNN
+                self.net.setInput(im)
+                y = self.net.forward()
+            else:  # ONNX Runtime
+                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
             im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
             if self.pb:
@@ -356,7 +365,8 @@ def forward(self, im, augment=False, visualize=False):
             y[..., 1] *= im.shape[1]  # y
             y[..., 2] *= im.shape[2]  # w
             y[..., 3] *= im.shape[1]  # h
-        return torch.tensor(y)
+        y = torch.tensor(y)
+        return (y, []) if val else y
 
 
 class AutoShape(nn.Module):

From 407d5d367aaa89b83ca977fa36d961ff1e9fda3c Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:45:55 +0100
Subject: [PATCH 11/63] val.py MultiBackend inference

---
 val.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/val.py b/val.py
index 2118ad400ac7..6fd96e5ee4f7 100644
--- a/val.py
+++ b/val.py
@@ -23,10 +23,10 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
+from models.common import DetectMultiBackend
 from utils.callbacks import Callbacks
 from utils.datasets import create_dataloader
-from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_suffix, check_yaml,
+from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, ap_per_class
@@ -100,6 +100,7 @@ def run(data,
         name='exp',  # save to project/name
         exist_ok=False,  # existing project/name ok, do not increment
         half=True,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
         model=None,
         dataloader=None,
         save_dir=Path(''),
@@ -120,10 +121,12 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        check_suffix(weights, '.pt')
-        model = attempt_load(weights, map_location=device)  # load FP32 model
-        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-        imgsz = check_img_size(imgsz, s=gs)  # check image size
+        model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+        stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
+        imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+        if not model.pt:
+            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
+            batch_size = 1  # export.py models default to batch-size 1
 
         # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -146,10 +149,10 @@ def run(data,
     # Dataloader
     if not training:
         if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
+            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # run once
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True,
+        dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt,
                                        prefix=colorstr(f'{task}: '))[0]
 
     seen = 0
@@ -171,7 +174,7 @@ def run(data,
         dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(img, augment=augment)  # inference and training outputs
+        out, train_out = model(img, augment=augment, val=True)  # inference and training outputs
         dt[1] += time_sync() - t2
 
         # Compute loss
@@ -318,6 +321,7 @@ def parse_opt():
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
     opt.data = check_yaml(opt.data)  # check YAML
     opt.save_json |= opt.data.endswith('coco.yaml')

From 71b320aa836aa86706ac89ff255181c76d301bea Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:53:30 +0100
Subject: [PATCH 12/63] warmup fix

---
 detect.py | 2 +-
 val.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/detect.py b/detect.py
index a216ba2a59ca..a110dd5c63ad 100644
--- a/detect.py
+++ b/detect.py
@@ -96,7 +96,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
     # Run inference
     if pt and device.type != 'cpu':
-        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # run once
+        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
     dt, seen = [0.0, 0.0, 0.0], 0
     for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
diff --git a/val.py b/val.py
index 6fd96e5ee4f7..95fcbeea4eb5 100644
--- a/val.py
+++ b/val.py
@@ -148,8 +148,8 @@ def run(data,
 
     # Dataloader
     if not training:
-        if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # run once
+        if model.pt and device.type != 'cpu':
+            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
         dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt,

From 293e98da9117529516ca75c7cacaff6ff8306fd8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:57:51 +0100
Subject: [PATCH 13/63] to device fix

---
 val.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/val.py b/val.py
index 95fcbeea4eb5..cf148cf867f0 100644
--- a/val.py
+++ b/val.py
@@ -163,18 +163,21 @@ def run(data,
     dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
     loss = torch.zeros(3, device=device)
     jdict, stats, ap, ap_class = [], [], [], []
-    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
+    for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
-        img = img.to(device, non_blocking=True)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255  # 0 - 255 to 0.0 - 1.0
+        if model.onnx:
+            im = np.array(im).astype('float32')
+        else:
+            im = im.to(device, non_blocking=True)
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
         targets = targets.to(device)
-        nb, _, height, width = img.shape  # batch size, channels, height, width
+        nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()
         dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(img, augment=augment, val=True)  # inference and training outputs
+        out, train_out = model(im, augment=augment, val=True)  # inference and training outputs
         dt[1] += time_sync() - t2
 
         # Compute loss
@@ -205,12 +208,12 @@ def run(data,
             if single_cls:
                 pred[:, 5] = 0
             predn = pred.clone()
-            scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
 
             # Evaluate
             if nl:
                 tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
-                scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
                 labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                 correct = process_batch(predn, labelsn, iouv)
                 if plots:
@@ -224,14 +227,14 @@ def run(data,
                 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
             if save_json:
                 save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
-            callbacks.run('on_val_image_end', pred, predn, path, names, img[si])
+            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:
             f = save_dir / f'val_batch{batch_i}_labels.jpg'  # labels
-            Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
+            Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start()
             f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
-            Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
+            Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
 
     # Compute statistics
     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy

From 201107ebc305327674cddf99811e89ba4b3085cd Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:00:01 +0100
Subject: [PATCH 14/63] pt fix

---
 val.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index cf148cf867f0..5e466d4f208b 100644
--- a/val.py
+++ b/val.py
@@ -111,7 +111,7 @@ def run(data,
     # Initialize/load model and set device
     training = model is not None
     if training:  # called by train.py
-        device = next(model.parameters()).device  # get model device
+        device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
@@ -165,13 +165,11 @@ def run(data,
     jdict, stats, ap, ap_class = [], [], [], []
     for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
-        if model.onnx:
-            im = np.array(im).astype('float32')
-        else:
+        if pt:
             im = im.to(device, non_blocking=True)
             im = im.half() if half else im.float()  # uint8 to fp16/32
+            targets = targets.to(device)
         im /= 255  # 0 - 255 to 0.0 - 1.0
-        targets = targets.to(device)
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()
         dt[0] += t2 - t1

From a7f17e940c5db085f862ea4abdbcc8f8faea0e96 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:11:15 +0100
Subject: [PATCH 15/63] device fix

---
 val.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/val.py b/val.py
index 5e466d4f208b..8337a901f972 100644
--- a/val.py
+++ b/val.py
@@ -127,6 +127,7 @@ def run(data,
         if not model.pt:
             LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
             batch_size = 1  # export.py models default to batch-size 1
+            device = torch.device('cpu')
 
         # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -169,6 +170,8 @@ def run(data,
             im = im.to(device, non_blocking=True)
             im = im.half() if half else im.float()  # uint8 to fp16/32
             targets = targets.to(device)
+        else:
+            im = im.numpy().astype('float32')
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()

From 3cf44c32c3fc283c494414e23a4e63342d395d3d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:18:15 +0100
Subject: [PATCH 16/63] Val cleanup

---
 val.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index 8337a901f972..c25fb4b4b9c8 100644
--- a/val.py
+++ b/val.py
@@ -137,7 +137,7 @@ def run(data,
         data = check_dataset(data)  # check
 
     # Half
-    half &= device.type != 'cpu'  # half precision only supported on CUDA
+    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
     model.half() if half else model.float()
 
     # Configure
@@ -149,7 +149,7 @@ def run(data,
 
     # Dataloader
     if not training:
-        if model.pt and device.type != 'cpu':
+        if pt and device.type != 'cpu':
             model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
@@ -166,12 +166,12 @@ def run(data,
     jdict, stats, ap, ap_class = [], [], [], []
     for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         if pt:
             im = im.to(device, non_blocking=True)
-            im = im.half() if half else im.float()  # uint8 to fp16/32
             targets = targets.to(device)
-        else:
-            im = im.numpy().astype('float32')
+        # else:
+        #     im = im.numpy().astype('float32')
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()

From d32ca2e8c28ac8574312551372b5b7555a2173a2 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:21:47 +0100
Subject: [PATCH 17/63] COCO128 URL to assets

---
 data/coco128.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/coco128.yaml b/data/coco128.yaml
index b1dfb004afa1..84a91b18359d 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -27,4 +27,4 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
 
 
 # Download script/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
+download: https://ultralytics.com/assets/coco128.zip

From 5f3a5fb45d44ecf2f801f0e53688d4a11e38c9c2 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:41:24 +0100
Subject: [PATCH 18/63] half fix

---
 detect.py        | 8 +++-----
 models/common.py | 4 ++--
 val.py           | 6 ++----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/detect.py b/detect.py
index a110dd5c63ad..34b2095a789b 100644
--- a/detect.py
+++ b/detect.py
@@ -79,7 +79,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     half &= device.type != 'cpu'  # half precision only supported on CUDA
 
     # Load model
-    model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+    model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
     stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
@@ -100,11 +100,9 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     dt, seen = [0.0, 0.0, 0.0], 0
     for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
-        if onnx:
-            im = im.astype('float32')
-        else:
+        if pt:
             im = torch.from_numpy(im).to(device)
-            im = im.half() if half else im.float()  # uint8 to fp16/32
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         if len(im.shape) == 3:
             im = im[None]  # expand for batch dim
diff --git a/models/common.py b/models/common.py
index a050521aa8e0..9fa83a091434 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,7 +277,7 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
-    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True):
+    def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
@@ -289,7 +289,7 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True):
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-            if half:
+            if pt_half:
                 model.half()  # to FP16
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
diff --git a/val.py b/val.py
index c25fb4b4b9c8..d87b4e34f167 100644
--- a/val.py
+++ b/val.py
@@ -121,7 +121,7 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+        model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
         if not model.pt:
@@ -166,12 +166,10 @@ def run(data,
     jdict, stats, ap, ap_class = [], [], [], []
     for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
-        im = im.half() if half else im.float()  # uint8 to fp16/32
         if pt:
             im = im.to(device, non_blocking=True)
             targets = targets.to(device)
-        # else:
-        #     im = im.numpy().astype('float32')
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()

From 901566269d047767a60609b9d2b273f8b1dcaf92 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:47:58 +0100
Subject: [PATCH 19/63] detect fix

---
 detect.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index 34b2095a789b..19102c77ce8d 100644
--- a/detect.py
+++ b/detect.py
@@ -102,7 +102,9 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
         t1 = time_sync()
         if pt:
             im = torch.from_numpy(im).to(device)
-        im = im.half() if half else im.float()  # uint8 to fp16/32
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+        else:
+            im = im.astype('float32')
         im /= 255  # 0 - 255 to 0.0 - 1.0
         if len(im.shape) == 3:
             im = im[None]  # expand for batch dim

From dc5c37012f0bf99c4452e397ccdc751c93229b4e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:55:38 +0100
Subject: [PATCH 20/63] detect fix 2

---
 detect.py        | 7 ++-----
 models/common.py | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/detect.py b/detect.py
index 19102c77ce8d..8cb8e469b20a 100644
--- a/detect.py
+++ b/detect.py
@@ -100,11 +100,8 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     dt, seen = [0.0, 0.0, 0.0], 0
     for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
-        if pt:
-            im = torch.from_numpy(im).to(device)
-            im = im.half() if half else im.float()  # uint8 to fp16/32
-        else:
-            im = im.astype('float32')
+        im = torch.from_numpy(im).to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         if len(im.shape) == 3:
             im = im[None]  # expand for batch dim
diff --git a/models/common.py b/models/common.py
index 9fa83a091434..7032dc3be296 100644
--- a/models/common.py
+++ b/models/common.py
@@ -337,7 +337,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
             y = self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
         elif self.onnx:  # ONNX
-            im = np.array(im)
+            im = im.cpu().numpy()  # torch to numpy
             if self.dnn:  # ONNX OpenCV DNN
                 self.net.setInput(im)
                 y = self.net.forward()

From 77fbc8f1708d5c33ff4fb0cd5cf38a7ff47e2f5d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:05:19 +0100
Subject: [PATCH 21/63] remove half from DetectMultiBackend

---
 detect.py        | 11 ++++++-----
 models/common.py |  4 +---
 val.py           |  4 ++--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/detect.py b/detect.py
index 8cb8e469b20a..865104c32504 100644
--- a/detect.py
+++ b/detect.py
@@ -74,15 +74,16 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
     (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
-    # Initialize
-    device = select_device(device)
-    half &= device.type != 'cpu'  # half precision only supported on CUDA
-
     # Load model
-    model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn)
     stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
+    # Half
+    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+    model.half() if half else model.float()
+
     # Dataloader
     if webcam:
         view_img = check_imshow()
diff --git a/models/common.py b/models/common.py
index 7032dc3be296..ee2fa6f89988 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,7 +277,7 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
-    def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True):
+    def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
@@ -289,8 +289,6 @@ def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True):
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-            if pt_half:
-                model.half()  # to FP16
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
             check_requirements(('opencv-python>=4.5.4',))
diff --git a/val.py b/val.py
index d87b4e34f167..ded19e7b0daf 100644
--- a/val.py
+++ b/val.py
@@ -121,10 +121,10 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
+        model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
-        if not model.pt:
+        if not pt:
             LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')

From a80c511ead1a02d980dd39e95f6f04d20157bc8e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:12:11 +0100
Subject: [PATCH 22/63] training half handling

---
 val.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/val.py b/val.py
index ded19e7b0daf..4195f2a296fe 100644
--- a/val.py
+++ b/val.py
@@ -113,6 +113,8 @@ def run(data,
     if training:  # called by train.py
         device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
+        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        model.half() if half else model.float()
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
 
@@ -129,17 +131,12 @@ def run(data,
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')
 
-        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
-        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
-        #     model = nn.DataParallel(model)
+        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        model.model.half() if half else model.model.float()
 
         # Data
         data = check_dataset(data)  # check
 
-    # Half
-    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
-    model.half() if half else model.float()
-
     # Configure
     model.eval()
     is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt')  # COCO dataset

From e165bd4367fcf33949a9feea5c9771d9b2ad2858 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:15:20 +0100
Subject: [PATCH 23/63] training half handling 2

---
 val.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index 4195f2a296fe..39c03a499059 100644
--- a/val.py
+++ b/val.py
@@ -113,7 +113,7 @@ def run(data,
     if training:  # called by train.py
         device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
-        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        half &= device.type != 'cpu'  # half precision only supported on CUDA
         model.half() if half else model.float()
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
@@ -126,14 +126,14 @@ def run(data,
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
-        if not pt:
+        if pt:
+            half &= device.type != 'cpu'  # half precision only supported on CUDA
+            model.model.half() if half else model.model.float()
+        else:
             LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')
 
-        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
-        model.model.half() if half else model.model.float()
-
         # Data
         data = check_dataset(data)  # check
 

From c743bb645a3b83e5f04daee3f084a9bc467acddd Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:23:34 +0100
Subject: [PATCH 24/63] training half handling 3

---
 detect.py | 3 ++-
 val.py    | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/detect.py b/detect.py
index 865104c32504..bee873405b2d 100644
--- a/detect.py
+++ b/detect.py
@@ -82,7 +82,8 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
     # Half
     half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
-    model.half() if half else model.float()
+    if pt:
+        model.model.half() if half else model.model.float()
 
     # Dataloader
     if webcam:
diff --git a/val.py b/val.py
index 39c03a499059..1df03f21776a 100644
--- a/val.py
+++ b/val.py
@@ -126,13 +126,14 @@ def run(data,
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
         if pt:
-            half &= device.type != 'cpu'  # half precision only supported on CUDA
             model.model.half() if half else model.model.float()
         else:
-            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
+            half = False
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')
+            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
 
         # Data
         data = check_dataset(data)  # check

From f312ab63d5dafe2c309a00fb9eb7d3028c861d5f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:29:51 +0100
Subject: [PATCH 25/63] Cleanup

---
 detect.py | 2 +-
 val.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/detect.py b/detect.py
index bee873405b2d..2da09edc78ae 100644
--- a/detect.py
+++ b/detect.py
@@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Half
-    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
     if pt:
         model.model.half() if half else model.model.float()
 
diff --git a/val.py b/val.py
index 1df03f21776a..da25017621cb 100644
--- a/val.py
+++ b/val.py
@@ -126,7 +126,7 @@ def run(data,
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
-        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
         if pt:
             model.model.half() if half else model.model.float()
         else:

From bdde9ef30ebac88aad4df0ce5f635d4178f0c2df Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:35:33 +0100
Subject: [PATCH 26/63] Fix CI error

---
 val.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/val.py b/val.py
index da25017621cb..09a662d9c722 100644
--- a/val.py
+++ b/val.py
@@ -173,22 +173,22 @@ def run(data,
         t2 = time_sync()
         dt[0] += t2 - t1
 
-        # Run model
-        out, train_out = model(im, augment=augment, val=True)  # inference and training outputs
+        # Inference
+        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
         dt[1] += time_sync() - t2
 
-        # Compute loss
+        # Loss
         if compute_loss:
             loss += compute_loss([x.float() for x in train_out], targets)[1]  # box, obj, cls
 
-        # Run NMS
+        # NMS
         targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         t3 = time_sync()
         out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
         dt[2] += time_sync() - t3
 
-        # Statistics per image
+        # Metrics
         for si, pred in enumerate(out):
             labels = targets[targets[:, 0] == si, 1:]
             nl = len(labels)
@@ -233,7 +233,7 @@ def run(data,
             f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
             Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
 
-    # Compute statistics
+    # Compute metrics
     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
         p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)

From 70039397cab8515d55ea998bb25ff4a2d6f7c95e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 23:43:01 +0100
Subject: [PATCH 27/63] Add torchscript _extra_files

---
 export.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/export.py b/export.py
index f5eb487045b0..74fa67c99e32 100644
--- a/export.py
+++ b/export.py
@@ -21,6 +21,7 @@
 """
 
 import argparse
+import json
 import os
 import subprocess
 import sys
@@ -54,7 +55,9 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:'
         f = file.with_suffix('.torchscript.pt')
 
         ts = torch.jit.trace(model, im, strict=False)
-        (optimize_for_mobile(ts) if optimize else ts).save(f)
+        dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type}
+        extra_files = {'config.txt': json.dumps(dict)}  # torch._C.ExtraFilesMap()
+        (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files)
 
         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
     except Exception as e:

From ef3f161ef25fc447173e37e8b5905aa5d35505b1 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 8 Nov 2021 00:03:41 +0100
Subject: [PATCH 28/63] Add TorchScript

---
 detect.py        |  6 +++---
 export.py        |  4 ++--
 models/common.py | 14 ++++++++++++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/detect.py b/detect.py
index 2da09edc78ae..108f8f138052 100644
--- a/detect.py
+++ b/detect.py
@@ -77,7 +77,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     # Load model
     device = select_device(device)
     model = DetectMultiBackend(weights, device=device, dnn=dnn)
-    stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
+    stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Half
@@ -89,10 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     if webcam:
         view_img = check_imshow()
         cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
         bs = len(dataset)  # batch_size
     else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
         bs = 1  # batch_size
     vid_path, vid_writer = [None] * bs, [None] * bs
 
diff --git a/export.py b/export.py
index 74fa67c99e32..4cf30e34fc7b 100644
--- a/export.py
+++ b/export.py
@@ -55,8 +55,8 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:'
         f = file.with_suffix('.torchscript.pt')
 
         ts = torch.jit.trace(model, im, strict=False)
-        dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type}
-        extra_files = {'config.txt': json.dumps(dict)}  # torch._C.ExtraFilesMap()
+        d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
+        extra_files = {'config.txt': json.dumps(d)}  # torch._C.ExtraFilesMap()
         (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files)
 
         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
diff --git a/models/common.py b/models/common.py
index ee2fa6f89988..2acb17318996 100644
--- a/models/common.py
+++ b/models/common.py
@@ -3,6 +3,7 @@
 Common modules
 """
 
+import json
 import logging
 import math
 import platform
@@ -283,8 +284,17 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
         pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+        jit = pt and 'torchscript' in w.lower()
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-        if pt:  # PyTorch
+
+        if jit:  # TorchScript
+            LOGGER.info(f'Loading {w} for TorchScript inference...')
+            extra_files = {'config.txt': ''}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files)
+            if extra_files['config.txt']:
+                d = json.loads(extra_files['config.txt'])  # extra_files dict
+                stride, names = int(d['stride']), d['names']
+        elif pt:  # PyTorch
             from models.experimental import attempt_load  # scoped to avoid circular import
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
@@ -332,7 +342,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
     def forward(self, im, augment=False, visualize=False, val=False):
         # YOLOv5 MultiBackend inference
         if self.pt:  # PyTorch
-            y = self.model(im, augment=augment, visualize=visualize)
+            y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
         elif self.onnx:  # ONNX
             im = im.cpu().numpy()  # torch to numpy

From 82bfd0f315da0434cb8effd34ad79fa3cb84a378 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 8 Nov 2021 00:13:36 +0100
Subject: [PATCH 29/63] Add CoreML

---
 models/common.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/models/common.py b/models/common.py
index 2acb17318996..9982a8d85c22 100644
--- a/models/common.py
+++ b/models/common.py
@@ -22,7 +22,7 @@
 
 from utils.datasets import exif_transpose, letterbox
 from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible,
-                           non_max_suppression, scale_coords, xyxy2xywh)
+                           non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 
@@ -281,9 +281,9 @@ class DetectMultiBackend(nn.Module):
     def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
-        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
+        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
-        pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+        pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes)  # backend booleans
         jit = pt and 'torchscript' in w.lower()
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
 
@@ -299,6 +299,9 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+        elif coreml:  # CoreML *.mlmodel
+            import coremltools as ct
+            model = ct.models.MLModel(w)
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
             check_requirements(('opencv-python>=4.5.4',))
@@ -341,9 +344,15 @@ def wrap_frozen_graph(gd, inputs, outputs):
 
     def forward(self, im, augment=False, visualize=False, val=False):
         # YOLOv5 MultiBackend inference
+        b, ch, h, w = im.shape  # batch, channel, height, width
         if self.pt:  # PyTorch
             y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
+        elif self.coreml:  # CoreML *.mlmodel
+            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
+            conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
+            y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
         elif self.onnx:  # ONNX
             im = im.cpu().numpy()  # torch to numpy
             if self.dnn:  # ONNX OpenCV DNN
@@ -352,7 +361,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
             else:  # ONNX Runtime
                 y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,h=640,w=640,3)
             if self.pb:
                 y = self.frozen_func(x=self.tf.constant(im)).numpy()
             elif self.saved_model:
@@ -369,10 +378,10 @@ def forward(self, im, augment=False, visualize=False, val=False):
                 if int8:
                     scale, zero_point = output['quantization']
                     y = (y.astype(np.float32) - zero_point) * scale  # re-scale
-            y[..., 0] *= im.shape[2]  # x
-            y[..., 1] *= im.shape[1]  # y
-            y[..., 2] *= im.shape[2]  # w
-            y[..., 3] *= im.shape[1]  # h
+            y[..., 0] *= w  # x
+            y[..., 1] *= h  # y
+            y[..., 2] *= w  # w
+            y[..., 3] *= h  # h
         y = torch.tensor(y)
         return (y, []) if val else y
 

From 109c5d65fecd112623f3b5700ab004f89ada001e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 8 Nov 2021 00:39:58 +0100
Subject: [PATCH 30/63] CoreML cleanup

---
 detect.py        | 2 +-
 models/common.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/detect.py b/detect.py
index 108f8f138052..cb3c4abd6425 100644
--- a/detect.py
+++ b/detect.py
@@ -203,7 +203,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.mlmodel', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
diff --git a/models/common.py b/models/common.py
index 9982a8d85c22..6fa7bdb33622 100644
--- a/models/common.py
+++ b/models/common.py
@@ -349,6 +349,9 @@ def forward(self, im, augment=False, visualize=False, val=False):
             y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
         elif self.coreml:  # CoreML *.mlmodel
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = Image.fromarray((im[0] * 255).astype('uint8'))
+            # im = im.resize((192, 320), Image.ANTIALIAS)
             y = self.model.predict({'image': im})  # coordinates are xywh normalized
             box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
             conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
@@ -361,7 +364,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
             else:  # ONNX Runtime
                 y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,h=640,w=640,3)
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
             if self.pb:
                 y = self.frozen_func(x=self.tf.constant(im)).numpy()
             elif self.saved_model:

From 3248a57cbcb4457cfc311808088971094bcfcd64 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:16:26 +0100
Subject: [PATCH 31/63] New `DetectMultiBackend()` class

---
 detect.py            | 124 ++++++++-----------------------------------
 models/common.py     |  94 +++++++++++++++++++++++++++++++-
 utils/general.py     |   3 +-
 utils/torch_utils.py |  20 -------
 4 files changed, 118 insertions(+), 123 deletions(-)

diff --git a/detect.py b/detect.py
index 661a0b86bc99..7080f83497fe 100644
--- a/detect.py
+++ b/detect.py
@@ -14,12 +14,10 @@
 
 import argparse
 import os
-import platform
 import sys
 from pathlib import Path
 
 import cv2
-import numpy as np
 import torch
 import torch.backends.cudnn as cudnn
 
@@ -29,13 +27,12 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
+from models.common import DetectMultiBackend
 from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
-from utils.general import (LOGGER, apply_classifier, check_file, check_img_size, check_imshow, check_requirements,
-                           check_suffix, colorstr, increment_path, non_max_suppression, print_args, scale_coords,
-                           strip_optimizer, xyxy2xywh)
+from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
+                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
-from utils.torch_utils import load_classifier, select_device, time_sync
+from utils.torch_utils import select_device, time_sync
 
 
 @torch.no_grad()
@@ -82,55 +79,9 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     half &= device.type != 'cpu'  # half precision only supported on CUDA
 
     # Load model
-    w = str(weights[0] if isinstance(weights, list) else weights)
-    classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
-    check_suffix(w, suffixes)  # check weights have acceptable suffix
-    pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
-    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-    if pt:
-        model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
-        stride = int(model.stride.max())  # model stride
-        names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-        if half:
-            model.half()  # to FP16
-        if classify:  # second-stage classifier
-            modelc = load_classifier(name='resnet50', n=2)  # initialize
-            modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
-    elif onnx:
-        if dnn:
-            check_requirements(('opencv-python>=4.5.4',))
-            net = cv2.dnn.readNetFromONNX(w)
-        else:
-            check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
-            import onnxruntime
-            session = onnxruntime.InferenceSession(w, None)
-    else:  # TensorFlow models
-        import tensorflow as tf
-        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
-            def wrap_frozen_graph(gd, inputs, outputs):
-                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped import
-                return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
-                               tf.nest.map_structure(x.graph.as_graph_element, outputs))
-
-            graph_def = tf.Graph().as_graph_def()
-            graph_def.ParseFromString(open(w, 'rb').read())
-            frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
-        elif saved_model:
-            model = tf.keras.models.load_model(w)
-        elif tflite:
-            if "edgetpu" in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-                import tflite_runtime.interpreter as tflri
-                delegate = {'Linux': 'libedgetpu.so.1',  # install libedgetpu https://coral.ai/software/#edgetpu-runtime
-                            'Darwin': 'libedgetpu.1.dylib',
-                            'Windows': 'edgetpu.dll'}[platform.system()]
-                interpreter = tflri.Interpreter(model_path=w, experimental_delegates=[tflri.load_delegate(delegate)])
-            else:
-                interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
-            interpreter.allocate_tensors()  # allocate
-            input_details = interpreter.get_input_details()  # inputs
-            output_details = interpreter.get_output_details()  # outputs
-            int8 = input_details[0]['dtype'] == np.uint8  # is TFLite quantized uint8 model
-    imgsz = check_img_size(imgsz, s=stride)  # check image size
+    model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+    stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
+    imgsz = check_img_size(imgsz, s=model.stride)  # check image size
 
     # Dataloader
     if webcam:
@@ -145,52 +96,24 @@ def wrap_frozen_graph(gd, inputs, outputs):
 
     # Run inference
     if pt and device.type != 'cpu':
-        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters())))  # run once
+        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # run once
     dt, seen = [0.0, 0.0, 0.0], 0
-    for path, img, im0s, vid_cap, s in dataset:
+    for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
         if onnx:
-            img = img.astype('float32')
+            im = im.astype('float32')
         else:
-            img = torch.from_numpy(img).to(device)
-            img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255  # 0 - 255 to 0.0 - 1.0
-        if len(img.shape) == 3:
-            img = img[None]  # expand for batch dim
+            im = torch.from_numpy(im).to(device)
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        if len(im.shape) == 3:
+            im = im[None]  # expand for batch dim
         t2 = time_sync()
         dt[0] += t2 - t1
 
         # Inference
-        if pt:
-            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-            pred = model(img, augment=augment, visualize=visualize)[0]
-        elif onnx:
-            if dnn:
-                net.setInput(img)
-                pred = torch.tensor(net.forward())
-            else:
-                pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
-        else:  # tensorflow model (tflite, pb, saved_model)
-            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
-            if pb:
-                pred = frozen_func(x=tf.constant(imn)).numpy()
-            elif saved_model:
-                pred = model(imn, training=False).numpy()
-            elif tflite:
-                if int8:
-                    scale, zero_point = input_details[0]['quantization']
-                    imn = (imn / scale + zero_point).astype(np.uint8)  # de-scale
-                interpreter.set_tensor(input_details[0]['index'], imn)
-                interpreter.invoke()
-                pred = interpreter.get_tensor(output_details[0]['index'])
-                if int8:
-                    scale, zero_point = output_details[0]['quantization']
-                    pred = (pred.astype(np.float32) - zero_point) * scale  # re-scale
-            pred[..., 0] *= imgsz[1]  # x
-            pred[..., 1] *= imgsz[0]  # y
-            pred[..., 2] *= imgsz[1]  # w
-            pred[..., 3] *= imgsz[0]  # h
-            pred = torch.tensor(pred)
+        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+        pred = model(im, augment=augment, visualize=visualize)
         t3 = time_sync()
         dt[1] += t3 - t2
 
@@ -199,8 +122,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
         dt[2] += time_sync() - t3
 
         # Second-stage classifier (optional)
-        if classify:
-            pred = apply_classifier(pred, modelc, img, im0s)
+        # pred = apply_classifier(pred, classifier_model, im, im0s)
 
         # Process predictions
         for i, det in enumerate(pred):  # per image
@@ -212,15 +134,15 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
 
             p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # img.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
-            s += '%gx%g ' % img.shape[2:]  # print string
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
                 # Print results
                 for c in det[:, -1].unique():
@@ -282,7 +204,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
diff --git a/models/common.py b/models/common.py
index 8035ef11a791..7f1ecfe0ee66 100644
--- a/models/common.py
+++ b/models/common.py
@@ -5,10 +5,12 @@
 
 import logging
 import math
+import platform
 import warnings
 from copy import copy
 from pathlib import Path
 
+import cv2
 import numpy as np
 import pandas as pd
 import requests
@@ -18,7 +20,8 @@
 from torch.cuda import amp
 
 from utils.datasets import exif_transpose, letterbox
-from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh
+from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \
+    check_suffix, check_requirements
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 
@@ -272,6 +275,95 @@ def forward(self, x):
         return torch.cat(x, self.d)
 
 
+class DetectMultiBackend(nn.Module):
+    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
+        super().__init__()
+        # Load model
+        w = str(weights[0] if isinstance(weights, list) else weights)
+        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
+        check_suffix(w, suffixes)  # check weights have acceptable suffix
+        pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+        stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+        if pt:
+            from models.experimental import attempt_load
+            model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
+            stride = int(model.stride.max())  # model stride
+            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+            if half:
+                model.half()  # to FP16
+        elif onnx:
+            if dnn:  # OpenCV DNN
+                check_requirements(('opencv-python>=4.5.4',))
+                net = cv2.dnn.readNetFromONNX(w)
+            else:  # ONNX Runtime
+                check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
+                import onnxruntime
+                session = onnxruntime.InferenceSession(w, None)
+        else:  # TensorFlow model (TFLite, pb, saved_model)
+            import tensorflow as tf
+            if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+                def wrap_frozen_graph(gd, inputs, outputs):
+                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
+                    return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
+                                   tf.nest.map_structure(x.graph.as_graph_element, outputs))
+
+                graph_def = tf.Graph().as_graph_def()
+                graph_def.ParseFromString(open(w, 'rb').read())
+                frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
+            elif saved_model:
+                model = tf.keras.models.load_model(w)
+            elif tflite:
+                if "edgetpu" in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                    import tflite_runtime.interpreter as tflri
+                    delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
+                                'Darwin': 'libedgetpu.1.dylib',
+                                'Windows': 'edgetpu.dll'}[platform.system()]
+                    interpreter = tflri.Interpreter(model_path=w,
+                                                    experimental_delegates=[tflri.load_delegate(delegate)])
+                else:
+                    interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
+                interpreter.allocate_tensors()  # allocate
+                input_details = interpreter.get_input_details()  # inputs
+                output_details = interpreter.get_output_details()  # outputs
+
+        self.__dict__.update(locals())  # all all variables to self
+
+    def forward(self, im, augment=False, profile=False, visualize=False):
+        # Inference
+        if self.pt:
+            y = self.model(im, augment=augment, visualize=visualize)[0]
+        elif self.onnx:
+            if self.dnn:  # OpenCV DNN
+                self.net.setInput(im)
+                y = self.net.forward()
+            else:  # ONNX Runtime
+                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+        else:  # TensorFlow model (TFLite, pb, saved_model)
+            import tensorflow as tf
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
+            if self.pb:
+                y = self.frozen_func(x=tf.constant(im)).numpy()
+            elif self.saved_model:
+                y = self.model(im, training=False).numpy()
+            elif self.tflite:
+                input, output = self.input_details[0], self.output_details[0]
+                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                if int8:
+                    scale, zero_point = input['quantization']
+                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
+                self.interpreter.set_tensor(input['index'], im)
+                self.interpreter.invoke()
+                y = self.interpreter.get_tensor(output['index'])
+                if int8:
+                    scale, zero_point = output['quantization']
+                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
+            y[..., 0] *= im.shape[2]  # x
+            y[..., 1] *= im.shape[1]  # y
+            y[..., 2] *= im.shape[2]  # w
+            y[..., 3] *= im.shape[1]  # h
+        return y if self.pt else torch.tensor(y)
+
+
 class AutoShape(nn.Module):
     # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
     conf = 0.25  # NMS confidence threshold
diff --git a/utils/general.py b/utils/general.py
index 0f45d72498fe..86c7d90c8220 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -785,7 +785,8 @@ def print_mutation(results, hyp, save_dir, bucket):
 
 
 def apply_classifier(x, model, img, im0):
-    # Apply a second stage classifier to yolo outputs
+    # Apply a second stage classifier to YOLO outputs
+    # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
     im0 = [im0] if isinstance(im0, np.ndarray) else im0
     for i, d in enumerate(x):  # per image
         if d is not None and len(d):
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index b36e98d0b656..d0f143b1a30b 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -18,7 +18,6 @@
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
 
 from utils.general import LOGGER
 
@@ -237,25 +236,6 @@ def model_info(model, verbose=False, img_size=640):
     LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
 
 
-def load_classifier(name='resnet101', n=2):
-    # Loads a pretrained model reshaped to n-class output
-    model = torchvision.models.__dict__[name](pretrained=True)
-
-    # ResNet model properties
-    # input_size = [3, 224, 224]
-    # input_space = 'RGB'
-    # input_range = [0, 1]
-    # mean = [0.485, 0.456, 0.406]
-    # std = [0.229, 0.224, 0.225]
-
-    # Reshape output to n classes
-    filters = model.fc.weight.shape[1]
-    model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
-    model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
-    model.fc.out_features = n
-    return model
-
-
 def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
     # scales img(bs,3,y,x) by ratio constrained to gs-multiple
     if ratio == 1.0:

From a9a0fedf2a1f923c37ded3b455d0567d40d7e4a8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:20:08 +0100
Subject: [PATCH 32/63] pb to pt fix

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index 7080f83497fe..f5ec42e612e2 100644
--- a/detect.py
+++ b/detect.py
@@ -204,7 +204,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')

From 11bd91c75191bbc66eabcf80ed63ee88ce14d593 Mon Sep 17 00:00:00 2001
From: "pre-commit-ci[bot]"
 <66853113+pre-commit-ci[bot]@users.noreply.github.com>
Date: Sun, 7 Nov 2021 13:17:56 +0000
Subject: [PATCH 33/63] [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci
---
 models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index 7f1ecfe0ee66..b0606ddca897 100644
--- a/models/common.py
+++ b/models/common.py
@@ -20,8 +20,8 @@
 from torch.cuda import amp
 
 from utils.datasets import exif_transpose, letterbox
-from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \
-    check_suffix, check_requirements
+from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible,
+                           non_max_suppression, scale_coords, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 

From 07b4289fa4224fa85a56fa6a7c3b7a5f8e6d5bf1 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:34:27 +0100
Subject: [PATCH 34/63] Cleanup

---
 models/common.py | 44 ++++++++++++++++++++------------------------
 1 file changed, 20 insertions(+), 24 deletions(-)

diff --git a/models/common.py b/models/common.py
index b0606ddca897..5f1b05e88fe9 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,28 +277,27 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
+        # MultiBackend model load
         super().__init__()
-        # Load model
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
         pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
         if pt:
-            from models.experimental import attempt_load
+            from models.experimental import attempt_load  # scoped to avoid circular import
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
             if half:
                 model.half()  # to FP16
-        elif onnx:
-            if dnn:  # OpenCV DNN
-                check_requirements(('opencv-python>=4.5.4',))
-                net = cv2.dnn.readNetFromONNX(w)
-            else:  # ONNX Runtime
-                check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
-                import onnxruntime
-                session = onnxruntime.InferenceSession(w, None)
+        elif dnn:  # ONNX OpenCV DNN
+            check_requirements(('opencv-python>=4.5.4',))
+            net = cv2.dnn.readNetFromONNX(w)
+        elif onnx:  # ONNX Runtime
+            check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
+            import onnxruntime
+            session = onnxruntime.InferenceSession(w, None)
         else:  # TensorFlow model (TFLite, pb, saved_model)
             import tensorflow as tf
             if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
@@ -325,24 +324,21 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 interpreter.allocate_tensors()  # allocate
                 input_details = interpreter.get_input_details()  # inputs
                 output_details = interpreter.get_output_details()  # outputs
+        self.__dict__.update(locals())  # assign all variables to self
 
-        self.__dict__.update(locals())  # all all variables to self
-
-    def forward(self, im, augment=False, profile=False, visualize=False):
-        # Inference
+    def forward(self, im, augment=False, visualize=False):
+        # MultiBackend inference
         if self.pt:
-            y = self.model(im, augment=augment, visualize=visualize)[0]
-        elif self.onnx:
-            if self.dnn:  # OpenCV DNN
-                self.net.setInput(im)
-                y = self.net.forward()
-            else:  # ONNX Runtime
-                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+            return self.model(im, augment=augment, visualize=visualize)[0]
+        elif self.dnn:  # ONNX OpenCV DNN
+            self.net.setInput(im)
+            y = self.net.forward()
+        elif self.onnx:  # ONNX Runtime
+            y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
-            import tensorflow as tf
             im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
             if self.pb:
-                y = self.frozen_func(x=tf.constant(im)).numpy()
+                y = self.frozen_func(x=self.tf.constant(im)).numpy()
             elif self.saved_model:
                 y = self.model(im, training=False).numpy()
             elif self.tflite:
@@ -361,7 +357,7 @@ def forward(self, im, augment=False, profile=False, visualize=False):
             y[..., 1] *= im.shape[1]  # y
             y[..., 2] *= im.shape[2]  # w
             y[..., 3] *= im.shape[1]  # h
-        return y if self.pt else torch.tensor(y)
+        return torch.tensor(y)
 
 
 class AutoShape(nn.Module):

From b6f6c0d9c29976de7dc506bd714666462520c3c9 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:36:57 +0100
Subject: [PATCH 35/63] explicit apply_classifier path

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index f5ec42e612e2..b93bc521c332 100644
--- a/detect.py
+++ b/detect.py
@@ -122,7 +122,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
         dt[2] += time_sync() - t3
 
         # Second-stage classifier (optional)
-        # pred = apply_classifier(pred, classifier_model, im, im0s)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
 
         # Process predictions
         for i, det in enumerate(pred):  # per image

From 4d85ec297e31f0eb1ecc96365c6011520be3b1a4 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:40:19 +0100
Subject: [PATCH 36/63] Cleanup2

---
 models/common.py | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/models/common.py b/models/common.py
index 5f1b05e88fe9..2ea7088530e9 100644
--- a/models/common.py
+++ b/models/common.py
@@ -312,13 +312,12 @@ def wrap_frozen_graph(gd, inputs, outputs):
             elif saved_model:
                 model = tf.keras.models.load_model(w)
             elif tflite:
-                if "edgetpu" in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
-                    import tflite_runtime.interpreter as tflri
+                if 'edgetpu' in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                    import tflite_runtime.interpreter as tfli
                     delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
                                 'Darwin': 'libedgetpu.1.dylib',
                                 'Windows': 'edgetpu.dll'}[platform.system()]
-                    interpreter = tflri.Interpreter(model_path=w,
-                                                    experimental_delegates=[tflri.load_delegate(delegate)])
+                    interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
                 else:
                     interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
                 interpreter.allocate_tensors()  # allocate

From df737a01dd0ee7cb5d97904b5f2be439322f326f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:41:32 +0100
Subject: [PATCH 37/63] Cleanup3

---
 models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index 2ea7088530e9..7359043cb805 100644
--- a/models/common.py
+++ b/models/common.py
@@ -311,8 +311,8 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
             elif saved_model:
                 model = tf.keras.models.load_model(w)
-            elif tflite:
-                if 'edgetpu' in w:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+            elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                if 'edgetpu' in w.lower():
                     import tflite_runtime.interpreter as tfli
                     delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
                                 'Darwin': 'libedgetpu.1.dylib',

From 70e9dfb3621f43547c4551e3689fc4bc20ba4b9f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:45:06 +0100
Subject: [PATCH 38/63] Cleanup4

---
 models/common.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/models/common.py b/models/common.py
index 7359043cb805..b309705da9b0 100644
--- a/models/common.py
+++ b/models/common.py
@@ -276,8 +276,8 @@ def forward(self, x):
 
 
 class DetectMultiBackend(nn.Module):
+    # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
     def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
-        # MultiBackend model load
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
@@ -326,7 +326,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
         self.__dict__.update(locals())  # assign all variables to self
 
     def forward(self, im, augment=False, visualize=False):
-        # MultiBackend inference
+        # YOLOv5 MultiBackend inference
         if self.pt:
             return self.model(im, augment=augment, visualize=visualize)[0]
         elif self.dnn:  # ONNX OpenCV DNN

From 9729521164435d041a899c50bd12857e8358c98d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 14:47:41 +0100
Subject: [PATCH 39/63] Cleanup5

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index b93bc521c332..a216ba2a59ca 100644
--- a/detect.py
+++ b/detect.py
@@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     # Load model
     model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
     stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
-    imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Dataloader
     if webcam:

From ab7358f6259e77b8e2a0ec55fd552b42158b3d63 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:33:48 +0100
Subject: [PATCH 40/63] Cleanup6

---
 models/common.py | 32 +++++++++++++++++++++-----------
 1 file changed, 21 insertions(+), 11 deletions(-)

diff --git a/models/common.py b/models/common.py
index b309705da9b0..a050521aa8e0 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,14 +277,14 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
-    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
+    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
         pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-        if pt:
+        if pt:  # PyTorch
             from models.experimental import attempt_load  # scoped to avoid circular import
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
@@ -292,9 +292,11 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False):
             if half:
                 model.half()  # to FP16
         elif dnn:  # ONNX OpenCV DNN
+            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
             check_requirements(('opencv-python>=4.5.4',))
             net = cv2.dnn.readNetFromONNX(w)
         elif onnx:  # ONNX Runtime
+            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
             check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
             import onnxruntime
             session = onnxruntime.InferenceSession(w, None)
@@ -306,34 +308,41 @@ def wrap_frozen_graph(gd, inputs, outputs):
                     return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
                                    tf.nest.map_structure(x.graph.as_graph_element, outputs))
 
+                LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')
                 graph_def = tf.Graph().as_graph_def()
                 graph_def.ParseFromString(open(w, 'rb').read())
                 frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
             elif saved_model:
+                LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')
                 model = tf.keras.models.load_model(w)
             elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
                 if 'edgetpu' in w.lower():
+                    LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...')
                     import tflite_runtime.interpreter as tfli
                     delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
                                 'Darwin': 'libedgetpu.1.dylib',
                                 'Windows': 'edgetpu.dll'}[platform.system()]
                     interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
                 else:
+                    LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
                     interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
                 interpreter.allocate_tensors()  # allocate
                 input_details = interpreter.get_input_details()  # inputs
                 output_details = interpreter.get_output_details()  # outputs
         self.__dict__.update(locals())  # assign all variables to self
 
-    def forward(self, im, augment=False, visualize=False):
+    def forward(self, im, augment=False, visualize=False, val=False):
         # YOLOv5 MultiBackend inference
-        if self.pt:
-            return self.model(im, augment=augment, visualize=visualize)[0]
-        elif self.dnn:  # ONNX OpenCV DNN
-            self.net.setInput(im)
-            y = self.net.forward()
-        elif self.onnx:  # ONNX Runtime
-            y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+        if self.pt:  # PyTorch
+            y = self.model(im, augment=augment, visualize=visualize)
+            return y if val else y[0]
+        elif self.onnx:  # ONNX
+            im = np.array(im)
+            if self.dnn:  # ONNX OpenCV DNN
+                self.net.setInput(im)
+                y = self.net.forward()
+            else:  # ONNX Runtime
+                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
             im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
             if self.pb:
@@ -356,7 +365,8 @@ def forward(self, im, augment=False, visualize=False):
             y[..., 1] *= im.shape[1]  # y
             y[..., 2] *= im.shape[2]  # w
             y[..., 3] *= im.shape[1]  # h
-        return torch.tensor(y)
+        y = torch.tensor(y)
+        return (y, []) if val else y
 
 
 class AutoShape(nn.Module):

From c1bf0e2ddb01fa295253861ed7a2bb06a7f460c5 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:45:55 +0100
Subject: [PATCH 41/63] val.py MultiBackend inference

---
 val.py | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/val.py b/val.py
index d2797f1189ec..832a662ad3fe 100644
--- a/val.py
+++ b/val.py
@@ -23,10 +23,10 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
+from models.common import DetectMultiBackend
 from utils.callbacks import Callbacks
 from utils.datasets import create_dataloader
-from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_suffix, check_yaml,
+from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_yaml,
                            coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
                            scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.metrics import ConfusionMatrix, ap_per_class
@@ -100,6 +100,7 @@ def run(data,
         name='exp',  # save to project/name
         exist_ok=False,  # existing project/name ok, do not increment
         half=True,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
         model=None,
         dataloader=None,
         save_dir=Path(''),
@@ -120,10 +121,12 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        check_suffix(weights, '.pt')
-        model = attempt_load(weights, map_location=device)  # load FP32 model
-        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-        imgsz = check_img_size(imgsz, s=gs)  # check image size
+        model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+        stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
+        imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+        if not model.pt:
+            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
+            batch_size = 1  # export.py models default to batch-size 1
 
         # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -146,10 +149,10 @@ def run(data,
     # Dataloader
     if not training:
         if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
+            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # run once
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True,
+        dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt,
                                        prefix=colorstr(f'{task}: '))[0]
 
     seen = 0
@@ -171,7 +174,7 @@ def run(data,
         dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(img, augment=augment)  # inference and training outputs
+        out, train_out = model(img, augment=augment, val=True)  # inference and training outputs
         dt[1] += time_sync() - t2
 
         # Compute loss
@@ -318,6 +321,7 @@ def parse_opt():
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
     opt.data = check_yaml(opt.data)  # check YAML
     opt.save_json |= opt.data.endswith('coco.yaml')

From b5bae243af1303020fa64fbb7c53823e9d7c95ba Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:53:30 +0100
Subject: [PATCH 42/63] warmup fix

---
 detect.py | 2 +-
 val.py    | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/detect.py b/detect.py
index a216ba2a59ca..a110dd5c63ad 100644
--- a/detect.py
+++ b/detect.py
@@ -96,7 +96,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
     # Run inference
     if pt and device.type != 'cpu':
-        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # run once
+        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
     dt, seen = [0.0, 0.0, 0.0], 0
     for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
diff --git a/val.py b/val.py
index 832a662ad3fe..f44e4ed573ff 100644
--- a/val.py
+++ b/val.py
@@ -148,8 +148,8 @@ def run(data,
 
     # Dataloader
     if not training:
-        if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # run once
+        if model.pt and device.type != 'cpu':
+            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
         dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt,

From 96f2b3c90fb952c7e80061bad4245b89dfa2d66e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 15:57:51 +0100
Subject: [PATCH 43/63] to device fix

---
 val.py | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/val.py b/val.py
index f44e4ed573ff..01097fd67d21 100644
--- a/val.py
+++ b/val.py
@@ -163,18 +163,21 @@ def run(data,
     dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0
     loss = torch.zeros(3, device=device)
     jdict, stats, ap, ap_class = [], [], [], []
-    for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
+    for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
-        img = img.to(device, non_blocking=True)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255  # 0 - 255 to 0.0 - 1.0
+        if model.onnx:
+            im = np.array(im).astype('float32')
+        else:
+            im = im.to(device, non_blocking=True)
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
         targets = targets.to(device)
-        nb, _, height, width = img.shape  # batch size, channels, height, width
+        nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()
         dt[0] += t2 - t1
 
         # Run model
-        out, train_out = model(img, augment=augment, val=True)  # inference and training outputs
+        out, train_out = model(im, augment=augment, val=True)  # inference and training outputs
         dt[1] += time_sync() - t2
 
         # Compute loss
@@ -205,12 +208,12 @@ def run(data,
             if single_cls:
                 pred[:, 5] = 0
             predn = pred.clone()
-            scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
 
             # Evaluate
             if nl:
                 tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
-                scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
                 labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                 correct = process_batch(predn, labelsn, iouv)
                 if plots:
@@ -224,14 +227,14 @@ def run(data,
                 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
             if save_json:
                 save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
-            callbacks.run('on_val_image_end', pred, predn, path, names, img[si])
+            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:
             f = save_dir / f'val_batch{batch_i}_labels.jpg'  # labels
-            Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
+            Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start()
             f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
-            Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
+            Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
 
     # Compute statistics
     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy

From 32974f23f54437fc8f99b1415c9ccc77c4dd2e5d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:00:01 +0100
Subject: [PATCH 44/63] pt fix

---
 val.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index 01097fd67d21..13f9866273f8 100644
--- a/val.py
+++ b/val.py
@@ -111,7 +111,7 @@ def run(data,
     # Initialize/load model and set device
     training = model is not None
     if training:  # called by train.py
-        device = next(model.parameters()).device  # get model device
+        device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
@@ -165,13 +165,11 @@ def run(data,
     jdict, stats, ap, ap_class = [], [], [], []
     for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
-        if model.onnx:
-            im = np.array(im).astype('float32')
-        else:
+        if pt:
             im = im.to(device, non_blocking=True)
             im = im.half() if half else im.float()  # uint8 to fp16/32
+            targets = targets.to(device)
         im /= 255  # 0 - 255 to 0.0 - 1.0
-        targets = targets.to(device)
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()
         dt[0] += t2 - t1

From d955ed658b2787308fa1818711ce6ba81bf893c8 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:11:15 +0100
Subject: [PATCH 45/63] device fix

---
 val.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/val.py b/val.py
index 13f9866273f8..32712fbe558e 100644
--- a/val.py
+++ b/val.py
@@ -127,6 +127,7 @@ def run(data,
         if not model.pt:
             LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
             batch_size = 1  # export.py models default to batch-size 1
+            device = torch.device('cpu')
 
         # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
         # if device.type != 'cpu' and torch.cuda.device_count() > 1:
@@ -169,6 +170,8 @@ def run(data,
             im = im.to(device, non_blocking=True)
             im = im.half() if half else im.float()  # uint8 to fp16/32
             targets = targets.to(device)
+        else:
+            im = im.numpy().astype('float32')
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()

From 9c253596180bbe4aea9691f05151efc0306cea4d Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:18:15 +0100
Subject: [PATCH 46/63] Val cleanup

---
 val.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index 32712fbe558e..993c0be0052a 100644
--- a/val.py
+++ b/val.py
@@ -137,7 +137,7 @@ def run(data,
         data = check_dataset(data)  # check
 
     # Half
-    half &= device.type != 'cpu'  # half precision only supported on CUDA
+    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
     model.half() if half else model.float()
 
     # Configure
@@ -149,7 +149,7 @@ def run(data,
 
     # Dataloader
     if not training:
-        if model.pt and device.type != 'cpu':
+        if pt and device.type != 'cpu':
             model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
@@ -166,12 +166,12 @@ def run(data,
     jdict, stats, ap, ap_class = [], [], [], []
     for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         if pt:
             im = im.to(device, non_blocking=True)
-            im = im.half() if half else im.float()  # uint8 to fp16/32
             targets = targets.to(device)
-        else:
-            im = im.numpy().astype('float32')
+        # else:
+        #     im = im.numpy().astype('float32')
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()

From e9cd5eb2323b133bc6db3ff4034fa7751d37c84f Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:21:47 +0100
Subject: [PATCH 47/63] COCO128 URL to assets

---
 data/coco128.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/data/coco128.yaml b/data/coco128.yaml
index b1dfb004afa1..84a91b18359d 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -27,4 +27,4 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
 
 
 # Download script/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
+download: https://ultralytics.com/assets/coco128.zip

From 54d3dfa38183b3642cd18077dad6e9104dd897f3 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:41:24 +0100
Subject: [PATCH 48/63] half fix

---
 detect.py        | 8 +++-----
 models/common.py | 4 ++--
 val.py           | 6 ++----
 3 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/detect.py b/detect.py
index a110dd5c63ad..34b2095a789b 100644
--- a/detect.py
+++ b/detect.py
@@ -79,7 +79,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     half &= device.type != 'cpu'  # half precision only supported on CUDA
 
     # Load model
-    model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+    model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
     stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
@@ -100,11 +100,9 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     dt, seen = [0.0, 0.0, 0.0], 0
     for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
-        if onnx:
-            im = im.astype('float32')
-        else:
+        if pt:
             im = torch.from_numpy(im).to(device)
-            im = im.half() if half else im.float()  # uint8 to fp16/32
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         if len(im.shape) == 3:
             im = im[None]  # expand for batch dim
diff --git a/models/common.py b/models/common.py
index a050521aa8e0..9fa83a091434 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,7 +277,7 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
-    def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True):
+    def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
@@ -289,7 +289,7 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True):
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-            if half:
+            if pt_half:
                 model.half()  # to FP16
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
diff --git a/val.py b/val.py
index 993c0be0052a..c5b8e1cd7544 100644
--- a/val.py
+++ b/val.py
@@ -121,7 +121,7 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn)
+        model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
         if not model.pt:
@@ -166,12 +166,10 @@ def run(data,
     jdict, stats, ap, ap_class = [], [], [], []
     for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)):
         t1 = time_sync()
-        im = im.half() if half else im.float()  # uint8 to fp16/32
         if pt:
             im = im.to(device, non_blocking=True)
             targets = targets.to(device)
-        # else:
-        #     im = im.numpy().astype('float32')
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()

From 0b07c0c4b539ee6cf10616e894434b69b096d7ff Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:47:58 +0100
Subject: [PATCH 49/63] detect fix

---
 detect.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index 34b2095a789b..19102c77ce8d 100644
--- a/detect.py
+++ b/detect.py
@@ -102,7 +102,9 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
         t1 = time_sync()
         if pt:
             im = torch.from_numpy(im).to(device)
-        im = im.half() if half else im.float()  # uint8 to fp16/32
+            im = im.half() if half else im.float()  # uint8 to fp16/32
+        else:
+            im = im.astype('float32')
         im /= 255  # 0 - 255 to 0.0 - 1.0
         if len(im.shape) == 3:
             im = im[None]  # expand for batch dim

From 55eefc0850845503e5cacab6a28be46b7e4cbe02 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 16:55:38 +0100
Subject: [PATCH 50/63] detect fix 2

---
 detect.py        | 7 ++-----
 models/common.py | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/detect.py b/detect.py
index 19102c77ce8d..8cb8e469b20a 100644
--- a/detect.py
+++ b/detect.py
@@ -100,11 +100,8 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     dt, seen = [0.0, 0.0, 0.0], 0
     for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
-        if pt:
-            im = torch.from_numpy(im).to(device)
-            im = im.half() if half else im.float()  # uint8 to fp16/32
-        else:
-            im = im.astype('float32')
+        im = torch.from_numpy(im).to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
         im /= 255  # 0 - 255 to 0.0 - 1.0
         if len(im.shape) == 3:
             im = im[None]  # expand for batch dim
diff --git a/models/common.py b/models/common.py
index 9fa83a091434..7032dc3be296 100644
--- a/models/common.py
+++ b/models/common.py
@@ -337,7 +337,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
             y = self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
         elif self.onnx:  # ONNX
-            im = np.array(im)
+            im = im.cpu().numpy()  # torch to numpy
             if self.dnn:  # ONNX OpenCV DNN
                 self.net.setInput(im)
                 y = self.net.forward()

From 17676ae64c25152885e5c00a98877366b9901473 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:05:19 +0100
Subject: [PATCH 51/63] remove half from DetectMultiBackend

---
 detect.py        | 11 ++++++-----
 models/common.py |  4 +---
 val.py           |  4 ++--
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/detect.py b/detect.py
index 8cb8e469b20a..865104c32504 100644
--- a/detect.py
+++ b/detect.py
@@ -74,15 +74,16 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
     (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
-    # Initialize
-    device = select_device(device)
-    half &= device.type != 'cpu'  # half precision only supported on CUDA
-
     # Load model
-    model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
+    device = select_device(device)
+    model = DetectMultiBackend(weights, device=device, dnn=dnn)
     stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
+    # Half
+    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+    model.half() if half else model.float()
+
     # Dataloader
     if webcam:
         view_img = check_imshow()
diff --git a/models/common.py b/models/common.py
index 7032dc3be296..ee2fa6f89988 100644
--- a/models/common.py
+++ b/models/common.py
@@ -277,7 +277,7 @@ def forward(self, x):
 
 class DetectMultiBackend(nn.Module):
     # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
-    def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True):
+    def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
@@ -289,8 +289,6 @@ def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True):
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-            if pt_half:
-                model.half()  # to FP16
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
             check_requirements(('opencv-python>=4.5.4',))
diff --git a/val.py b/val.py
index c5b8e1cd7544..e62918596b11 100644
--- a/val.py
+++ b/val.py
@@ -121,10 +121,10 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn)
+        model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
-        if not model.pt:
+        if not pt:
             LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')

From 3985a5928d4b26b5e2f16d19a3a6db31d6c56c60 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:12:11 +0100
Subject: [PATCH 52/63] training half handling

---
 val.py | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/val.py b/val.py
index e62918596b11..e53f59635cbb 100644
--- a/val.py
+++ b/val.py
@@ -113,6 +113,8 @@ def run(data,
     if training:  # called by train.py
         device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
+        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        model.half() if half else model.float()
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
 
@@ -129,17 +131,12 @@ def run(data,
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')
 
-        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
-        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
-        #     model = nn.DataParallel(model)
+        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        model.model.half() if half else model.model.float()
 
         # Data
         data = check_dataset(data)  # check
 
-    # Half
-    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
-    model.half() if half else model.float()
-
     # Configure
     model.eval()
     is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt')  # COCO dataset

From 9844c8177b7c75d8c3fd26714449fb54b0be06a2 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:15:20 +0100
Subject: [PATCH 53/63] training half handling 2

---
 val.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/val.py b/val.py
index e53f59635cbb..83a7e62c18e6 100644
--- a/val.py
+++ b/val.py
@@ -113,7 +113,7 @@ def run(data,
     if training:  # called by train.py
         device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
-        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        half &= device.type != 'cpu'  # half precision only supported on CUDA
         model.half() if half else model.float()
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
@@ -126,14 +126,14 @@ def run(data,
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
-        if not pt:
+        if pt:
+            half &= device.type != 'cpu'  # half precision only supported on CUDA
+            model.model.half() if half else model.model.float()
+        else:
             LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')
 
-        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
-        model.model.half() if half else model.model.float()
-
         # Data
         data = check_dataset(data)  # check
 

From fe94f4bb7a781f1424487527b78e8d55bcc7b087 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:23:34 +0100
Subject: [PATCH 54/63] training half handling 3

---
 detect.py | 3 ++-
 val.py    | 5 +++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/detect.py b/detect.py
index 865104c32504..bee873405b2d 100644
--- a/detect.py
+++ b/detect.py
@@ -82,7 +82,8 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
     # Half
     half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
-    model.half() if half else model.float()
+    if pt:
+        model.model.half() if half else model.model.float()
 
     # Dataloader
     if webcam:
diff --git a/val.py b/val.py
index 83a7e62c18e6..a24df1bae062 100644
--- a/val.py
+++ b/val.py
@@ -126,13 +126,14 @@ def run(data,
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
         if pt:
-            half &= device.type != 'cpu'  # half precision only supported on CUDA
             model.model.half() if half else model.model.float()
         else:
-            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
+            half = False
             batch_size = 1  # export.py models default to batch-size 1
             device = torch.device('cpu')
+            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
 
         # Data
         data = check_dataset(data)  # check

From 28de24694adbd567869429d0d9bcc9dcc9f8fd46 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:29:51 +0100
Subject: [PATCH 55/63] Cleanup

---
 detect.py | 2 +-
 val.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/detect.py b/detect.py
index bee873405b2d..2da09edc78ae 100644
--- a/detect.py
+++ b/detect.py
@@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Half
-    half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
     if pt:
         model.model.half() if half else model.model.float()
 
diff --git a/val.py b/val.py
index a24df1bae062..08686cf0b4d9 100644
--- a/val.py
+++ b/val.py
@@ -126,7 +126,7 @@ def run(data,
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
         stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
         imgsz = check_img_size(imgsz, s=model.stride)  # check image size
-        half &= pt and device.type != 'cpu'  # half precision only supported on CUDA
+        half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
         if pt:
             model.model.half() if half else model.model.float()
         else:

From 709d9ce7a3cfbab8024e8574591d388897be8409 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 17:35:33 +0100
Subject: [PATCH 56/63] Fix CI error

---
 val.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/val.py b/val.py
index 08686cf0b4d9..baefa79bbfbe 100644
--- a/val.py
+++ b/val.py
@@ -173,22 +173,22 @@ def run(data,
         t2 = time_sync()
         dt[0] += t2 - t1
 
-        # Run model
-        out, train_out = model(im, augment=augment, val=True)  # inference and training outputs
+        # Inference
+        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
         dt[1] += time_sync() - t2
 
-        # Compute loss
+        # Loss
         if compute_loss:
             loss += compute_loss([x.float() for x in train_out], targets)[1]  # box, obj, cls
 
-        # Run NMS
+        # NMS
         targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         t3 = time_sync()
         out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
         dt[2] += time_sync() - t3
 
-        # Statistics per image
+        # Metrics
         for si, pred in enumerate(out):
             labels = targets[targets[:, 0] == si, 1:]
             nl = len(labels)
@@ -233,7 +233,7 @@ def run(data,
             f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
             Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
 
-    # Compute statistics
+    # Compute metrics
     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
         p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)

From 19bdb6e5c5c9da999ab0dd6dd81943b3951190ea Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Sun, 7 Nov 2021 23:43:01 +0100
Subject: [PATCH 57/63] Add torchscript _extra_files

---
 export.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/export.py b/export.py
index f5eb487045b0..74fa67c99e32 100644
--- a/export.py
+++ b/export.py
@@ -21,6 +21,7 @@
 """
 
 import argparse
+import json
 import os
 import subprocess
 import sys
@@ -54,7 +55,9 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:'
         f = file.with_suffix('.torchscript.pt')
 
         ts = torch.jit.trace(model, im, strict=False)
-        (optimize_for_mobile(ts) if optimize else ts).save(f)
+        dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type}
+        extra_files = {'config.txt': json.dumps(dict)}  # torch._C.ExtraFilesMap()
+        (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files)
 
         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
     except Exception as e:

From 358d9e3c75809e1c7e04e07d093a7ce9dc3c1920 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 8 Nov 2021 00:03:41 +0100
Subject: [PATCH 58/63] Add TorchScript

---
 detect.py        |  6 +++---
 export.py        |  4 ++--
 models/common.py | 14 ++++++++++++--
 3 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/detect.py b/detect.py
index 2da09edc78ae..108f8f138052 100644
--- a/detect.py
+++ b/detect.py
@@ -77,7 +77,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     # Load model
     device = select_device(device)
     model = DetectMultiBackend(weights, device=device, dnn=dnn)
-    stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
+    stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
     imgsz = check_img_size(imgsz, s=stride)  # check image size
 
     # Half
@@ -89,10 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
     if webcam:
         view_img = check_imshow()
         cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
         bs = len(dataset)  # batch_size
     else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
         bs = 1  # batch_size
     vid_path, vid_writer = [None] * bs, [None] * bs
 
diff --git a/export.py b/export.py
index 74fa67c99e32..4cf30e34fc7b 100644
--- a/export.py
+++ b/export.py
@@ -55,8 +55,8 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:'
         f = file.with_suffix('.torchscript.pt')
 
         ts = torch.jit.trace(model, im, strict=False)
-        dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type}
-        extra_files = {'config.txt': json.dumps(dict)}  # torch._C.ExtraFilesMap()
+        d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
+        extra_files = {'config.txt': json.dumps(d)}  # torch._C.ExtraFilesMap()
         (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files)
 
         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
diff --git a/models/common.py b/models/common.py
index ee2fa6f89988..2acb17318996 100644
--- a/models/common.py
+++ b/models/common.py
@@ -3,6 +3,7 @@
 Common modules
 """
 
+import json
 import logging
 import math
 import platform
@@ -283,8 +284,17 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
         pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+        jit = pt and 'torchscript' in w.lower()
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
-        if pt:  # PyTorch
+
+        if jit:  # TorchScript
+            LOGGER.info(f'Loading {w} for TorchScript inference...')
+            extra_files = {'config.txt': ''}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files)
+            if extra_files['config.txt']:
+                d = json.loads(extra_files['config.txt'])  # extra_files dict
+                stride, names = int(d['stride']), d['names']
+        elif pt:  # PyTorch
             from models.experimental import attempt_load  # scoped to avoid circular import
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
@@ -332,7 +342,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
     def forward(self, im, augment=False, visualize=False, val=False):
         # YOLOv5 MultiBackend inference
         if self.pt:  # PyTorch
-            y = self.model(im, augment=augment, visualize=visualize)
+            y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
         elif self.onnx:  # ONNX
             im = im.cpu().numpy()  # torch to numpy

From dc0b748e0ef23a681c5305e3bff54fb561c11d1e Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 8 Nov 2021 00:13:36 +0100
Subject: [PATCH 59/63] Add CoreML

---
 models/common.py | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/models/common.py b/models/common.py
index 2acb17318996..9982a8d85c22 100644
--- a/models/common.py
+++ b/models/common.py
@@ -22,7 +22,7 @@
 
 from utils.datasets import exif_transpose, letterbox
 from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible,
-                           non_max_suppression, scale_coords, xyxy2xywh)
+                           non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
 from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 
@@ -281,9 +281,9 @@ class DetectMultiBackend(nn.Module):
     def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
-        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
+        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel']
         check_suffix(w, suffixes)  # check weights have acceptable suffix
-        pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
+        pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes)  # backend booleans
         jit = pt and 'torchscript' in w.lower()
         stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
 
@@ -299,6 +299,9 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
             model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
             stride = int(model.stride.max())  # model stride
             names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+        elif coreml:  # CoreML *.mlmodel
+            import coremltools as ct
+            model = ct.models.MLModel(w)
         elif dnn:  # ONNX OpenCV DNN
             LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
             check_requirements(('opencv-python>=4.5.4',))
@@ -341,9 +344,15 @@ def wrap_frozen_graph(gd, inputs, outputs):
 
     def forward(self, im, augment=False, visualize=False, val=False):
         # YOLOv5 MultiBackend inference
+        b, ch, h, w = im.shape  # batch, channel, height, width
         if self.pt:  # PyTorch
             y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
+        elif self.coreml:  # CoreML *.mlmodel
+            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
+            conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
+            y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
         elif self.onnx:  # ONNX
             im = im.cpu().numpy()  # torch to numpy
             if self.dnn:  # ONNX OpenCV DNN
@@ -352,7 +361,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
             else:  # ONNX Runtime
                 y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,640,640,3)
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,h=640,w=640,3)
             if self.pb:
                 y = self.frozen_func(x=self.tf.constant(im)).numpy()
             elif self.saved_model:
@@ -369,10 +378,10 @@ def forward(self, im, augment=False, visualize=False, val=False):
                 if int8:
                     scale, zero_point = output['quantization']
                     y = (y.astype(np.float32) - zero_point) * scale  # re-scale
-            y[..., 0] *= im.shape[2]  # x
-            y[..., 1] *= im.shape[1]  # y
-            y[..., 2] *= im.shape[2]  # w
-            y[..., 3] *= im.shape[1]  # h
+            y[..., 0] *= w  # x
+            y[..., 1] *= h  # y
+            y[..., 2] *= w  # w
+            y[..., 3] *= h  # h
         y = torch.tensor(y)
         return (y, []) if val else y
 

From 0bfaba5d87dd2c479c176403279204e4e33a93a5 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Mon, 8 Nov 2021 00:39:58 +0100
Subject: [PATCH 60/63] CoreML cleanup

---
 detect.py        | 2 +-
 models/common.py | 5 ++++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/detect.py b/detect.py
index 108f8f138052..cb3c4abd6425 100644
--- a/detect.py
+++ b/detect.py
@@ -203,7 +203,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.mlmodel', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')
diff --git a/models/common.py b/models/common.py
index 9982a8d85c22..6fa7bdb33622 100644
--- a/models/common.py
+++ b/models/common.py
@@ -349,6 +349,9 @@ def forward(self, im, augment=False, visualize=False, val=False):
             y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
             return y if val else y[0]
         elif self.coreml:  # CoreML *.mlmodel
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = Image.fromarray((im[0] * 255).astype('uint8'))
+            # im = im.resize((192, 320), Image.ANTIALIAS)
             y = self.model.predict({'image': im})  # coordinates are xywh normalized
             box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
             conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
@@ -361,7 +364,7 @@ def forward(self, im, augment=False, visualize=False, val=False):
             else:  # ONNX Runtime
                 y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
         else:  # TensorFlow model (TFLite, pb, saved_model)
-            im = im.permute(0, 2, 3, 1).cpu().numpy()  # TF format (1,h=640,w=640,3)
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
             if self.pb:
                 y = self.frozen_func(x=self.tf.constant(im)).numpy()
             elif self.saved_model:

From cd92e01d0eaf49a2e69aeca72a5b6761265263d2 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 9 Nov 2021 10:56:44 +0100
Subject: [PATCH 61/63] revert default to pt

---
 detect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/detect.py b/detect.py
index cb3c4abd6425..108f8f138052 100644
--- a/detect.py
+++ b/detect.py
@@ -203,7 +203,7 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
 
 def parse_opt():
     parser = argparse.ArgumentParser()
-    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.mlmodel', help='model path(s)')
+    parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)')
     parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam')
     parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w')
     parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold')

From ffa76ee5b4f6ab82ec4975d1aaf37fb31dc117f9 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 9 Nov 2021 11:06:12 +0100
Subject: [PATCH 62/63] Add Usage examples

---
 models/common.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/models/common.py b/models/common.py
index 0a68c6270f8e..3ea7ba5477a6 100644
--- a/models/common.py
+++ b/models/common.py
@@ -274,8 +274,17 @@ def forward(self, x):
 
 
 class DetectMultiBackend(nn.Module):
-    # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN
+    # YOLOv5 MultiBackend class for python inference on various backends
     def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
+        # Usage:
+        #   PyTorch:      weights = *.pt
+        #   TorchScript:            *.torchscript.pt
+        #   CoreML:                 *.mlmodel
+        #   TensorFlow:             *_saved_model
+        #   TensorFlow:             *.pb
+        #   TensorFlow Lite:        *.tflite
+        #   ONNX Runtime:           *.onnx
+        #   OpenCV DNN:             *.onnx with dnn=True
         super().__init__()
         w = str(weights[0] if isinstance(weights, list) else weights)
         suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel']

From 0f98f01686e8a732ee168bcec67c622c2f542f46 Mon Sep 17 00:00:00 2001
From: Glenn Jocher <glenn.jocher@ultralytics.com>
Date: Tue, 9 Nov 2021 16:14:41 +0100
Subject: [PATCH 63/63] Cleanup val

---
 val.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/val.py b/val.py
index baefa79bbfbe..2bcbc582a500 100644
--- a/val.py
+++ b/val.py
@@ -124,8 +124,8 @@ def run(data,
 
         # Load model
         model = DetectMultiBackend(weights, device=device, dnn=dnn)
-        stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx
-        imgsz = check_img_size(imgsz, s=model.stride)  # check image size
+        stride, pt = model.stride, model.pt
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
         half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
         if pt:
             model.model.half() if half else model.model.float()
@@ -151,7 +151,7 @@ def run(data,
             model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt,
+        dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=pt,
                                        prefix=colorstr(f'{task}: '))[0]
 
     seen = 0