From b90e8bfcd0521274c2bb45b5a288e30d626512b3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:16:26 +0100 Subject: [PATCH 01/63] New `DetectMultiBackend()` class --- detect.py | 124 ++++++++----------------------------------- models/common.py | 94 +++++++++++++++++++++++++++++++- utils/general.py | 3 +- utils/torch_utils.py | 20 ------- 4 files changed, 118 insertions(+), 123 deletions(-) diff --git a/detect.py b/detect.py index 661a0b86bc99..7080f83497fe 100644 --- a/detect.py +++ b/detect.py @@ -14,12 +14,10 @@ import argparse import os -import platform import sys from pathlib import Path import cv2 -import numpy as np import torch import torch.backends.cudnn as cudnn @@ -29,13 +27,12 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load +from models.common import DetectMultiBackend from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams -from utils.general import (LOGGER, apply_classifier, check_file, check_img_size, check_imshow, check_requirements, - check_suffix, colorstr, increment_path, non_max_suppression, print_args, scale_coords, - strip_optimizer, xyxy2xywh) +from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, + increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box -from utils.torch_utils import load_classifier, select_device, time_sync +from utils.torch_utils import select_device, time_sync @torch.no_grad() @@ -82,55 +79,9 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model - w = str(weights[0] if isinstance(weights, list) else weights) - classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] - check_suffix(w, suffixes) # check weights have acceptable suffix - pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans - stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - if pt: - model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) - stride = int(model.stride.max()) # model stride - names = model.module.names if hasattr(model, 'module') else model.names # get class names - if half: - model.half() # to FP16 - if classify: # second-stage classifier - modelc = load_classifier(name='resnet50', n=2) # initialize - modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() - elif onnx: - if dnn: - check_requirements(('opencv-python>=4.5.4',)) - net = cv2.dnn.readNetFromONNX(w) - else: - check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) - import onnxruntime - session = onnxruntime.InferenceSession(w, None) - else: # TensorFlow models - import tensorflow as tf - if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt - def wrap_frozen_graph(gd, inputs, outputs): - x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import - return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), - tf.nest.map_structure(x.graph.as_graph_element, outputs)) - - graph_def = tf.Graph().as_graph_def() - graph_def.ParseFromString(open(w, 'rb').read()) - frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") - elif saved_model: - model = tf.keras.models.load_model(w) - elif tflite: - if "edgetpu" in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python - import tflite_runtime.interpreter as tflri - delegate = {'Linux': 'libedgetpu.so.1', # install libedgetpu https://coral.ai/software/#edgetpu-runtime - 'Darwin': 'libedgetpu.1.dylib', - 'Windows': 'edgetpu.dll'}[platform.system()] - interpreter = tflri.Interpreter(model_path=w, experimental_delegates=[tflri.load_delegate(delegate)]) - else: - interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model - interpreter.allocate_tensors() # allocate - input_details = interpreter.get_input_details() # inputs - output_details = interpreter.get_output_details() # outputs - int8 = input_details[0]['dtype'] == np.uint8 # is TFLite quantized uint8 model - imgsz = check_img_size(imgsz, s=stride) # check image size + model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx + imgsz = check_img_size(imgsz, s=model.stride) # check image size # Dataloader if webcam: @@ -145,52 +96,24 @@ def wrap_frozen_graph(gd, inputs, outputs): # Run inference if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 - for path, img, im0s, vid_cap, s in dataset: + for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() if onnx: - img = img.astype('float32') + im = im.astype('float32') else: - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255 # 0 - 255 to 0.0 - 1.0 - if len(img.shape) == 3: - img = img[None] # expand for batch dim + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference - if pt: - visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred = model(img, augment=augment, visualize=visualize)[0] - elif onnx: - if dnn: - net.setInput(img) - pred = torch.tensor(net.forward()) - else: - pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) - else: # tensorflow model (tflite, pb, saved_model) - imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy - if pb: - pred = frozen_func(x=tf.constant(imn)).numpy() - elif saved_model: - pred = model(imn, training=False).numpy() - elif tflite: - if int8: - scale, zero_point = input_details[0]['quantization'] - imn = (imn / scale + zero_point).astype(np.uint8) # de-scale - interpreter.set_tensor(input_details[0]['index'], imn) - interpreter.invoke() - pred = interpreter.get_tensor(output_details[0]['index']) - if int8: - scale, zero_point = output_details[0]['quantization'] - pred = (pred.astype(np.float32) - zero_point) * scale # re-scale - pred[..., 0] *= imgsz[1] # x - pred[..., 1] *= imgsz[0] # y - pred[..., 2] *= imgsz[1] # w - pred[..., 3] *= imgsz[0] # h - pred = torch.tensor(pred) + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred = model(im, augment=augment, visualize=visualize) t3 = time_sync() dt[1] += t3 - t2 @@ -199,8 +122,7 @@ def wrap_frozen_graph(gd, inputs, outputs): dt[2] += time_sync() - t3 # Second-stage classifier (optional) - if classify: - pred = apply_classifier(pred, modelc, img, im0s) + # pred = apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image @@ -212,15 +134,15 @@ def wrap_frozen_graph(gd, inputs, outputs): p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path - save_path = str(save_dir / p.name) # img.jpg - txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt - s += '%gx%g ' % img.shape[2:] # print string + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt + s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): @@ -282,7 +204,7 @@ def wrap_frozen_graph(gd, inputs, outputs): def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') diff --git a/models/common.py b/models/common.py index 8035ef11a791..7f1ecfe0ee66 100644 --- a/models/common.py +++ b/models/common.py @@ -5,10 +5,12 @@ import logging import math +import platform import warnings from copy import copy from pathlib import Path +import cv2 import numpy as np import pandas as pd import requests @@ -18,7 +20,8 @@ from torch.cuda import amp from utils.datasets import exif_transpose, letterbox -from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh +from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \ + check_suffix, check_requirements from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync @@ -272,6 +275,95 @@ def forward(self, x): return torch.cat(x, self.d) +class DetectMultiBackend(nn.Module): + def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): + super().__init__() + # Load model + w = str(weights[0] if isinstance(weights, list) else weights) + suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] + check_suffix(w, suffixes) # check weights have acceptable suffix + pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans + stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults + if pt: + from models.experimental import attempt_load + model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) + stride = int(model.stride.max()) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + if half: + model.half() # to FP16 + elif onnx: + if dnn: # OpenCV DNN + check_requirements(('opencv-python>=4.5.4',)) + net = cv2.dnn.readNetFromONNX(w) + else: # ONNX Runtime + check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) + import onnxruntime + session = onnxruntime.InferenceSession(w, None) + else: # TensorFlow model (TFLite, pb, saved_model) + import tensorflow as tf + if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt + def wrap_frozen_graph(gd, inputs, outputs): + x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped + return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), + tf.nest.map_structure(x.graph.as_graph_element, outputs)) + + graph_def = tf.Graph().as_graph_def() + graph_def.ParseFromString(open(w, 'rb').read()) + frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") + elif saved_model: + model = tf.keras.models.load_model(w) + elif tflite: + if "edgetpu" in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + import tflite_runtime.interpreter as tflri + delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll'}[platform.system()] + interpreter = tflri.Interpreter(model_path=w, + experimental_delegates=[tflri.load_delegate(delegate)]) + else: + interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model + interpreter.allocate_tensors() # allocate + input_details = interpreter.get_input_details() # inputs + output_details = interpreter.get_output_details() # outputs + + self.__dict__.update(locals()) # all all variables to self + + def forward(self, im, augment=False, profile=False, visualize=False): + # Inference + if self.pt: + y = self.model(im, augment=augment, visualize=visualize)[0] + elif self.onnx: + if self.dnn: # OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + else: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + else: # TensorFlow model (TFLite, pb, saved_model) + import tensorflow as tf + im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) + if self.pb: + y = self.frozen_func(x=tf.constant(im)).numpy() + elif self.saved_model: + y = self.model(im, training=False).numpy() + elif self.tflite: + input, output = self.input_details[0], self.output_details[0] + int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model + if int8: + scale, zero_point = input['quantization'] + im = (im / scale + zero_point).astype(np.uint8) # de-scale + self.interpreter.set_tensor(input['index'], im) + self.interpreter.invoke() + y = self.interpreter.get_tensor(output['index']) + if int8: + scale, zero_point = output['quantization'] + y = (y.astype(np.float32) - zero_point) * scale # re-scale + y[..., 0] *= im.shape[2] # x + y[..., 1] *= im.shape[1] # y + y[..., 2] *= im.shape[2] # w + y[..., 3] *= im.shape[1] # h + return y if self.pt else torch.tensor(y) + + class AutoShape(nn.Module): # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS conf = 0.25 # NMS confidence threshold diff --git a/utils/general.py b/utils/general.py index 0f45d72498fe..86c7d90c8220 100755 --- a/utils/general.py +++ b/utils/general.py @@ -785,7 +785,8 @@ def print_mutation(results, hyp, save_dir, bucket): def apply_classifier(x, model, img, im0): - # Apply a second stage classifier to yolo outputs + # Apply a second stage classifier to YOLO outputs + # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() im0 = [im0] if isinstance(im0, np.ndarray) else im0 for i, d in enumerate(x): # per image if d is not None and len(d): diff --git a/utils/torch_utils.py b/utils/torch_utils.py index b36e98d0b656..d0f143b1a30b 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -18,7 +18,6 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -import torchvision from utils.general import LOGGER @@ -237,25 +236,6 @@ def model_info(model, verbose=False, img_size=640): LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") -def load_classifier(name='resnet101', n=2): - # Loads a pretrained model reshaped to n-class output - model = torchvision.models.__dict__[name](pretrained=True) - - # ResNet model properties - # input_size = [3, 224, 224] - # input_space = 'RGB' - # input_range = [0, 1] - # mean = [0.485, 0.456, 0.406] - # std = [0.229, 0.224, 0.225] - - # Reshape output to n classes - filters = model.fc.weight.shape[1] - model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) - model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) - model.fc.out_features = n - return model - - def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) # scales img(bs,3,y,x) by ratio constrained to gs-multiple if ratio == 1.0: From 0d9bc34be4141e85034c9b1924103d07499f6a2f Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 7 Nov 2021 13:17:56 +0000 Subject: [PATCH 02/63] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index 7f1ecfe0ee66..b0606ddca897 100644 --- a/models/common.py +++ b/models/common.py @@ -20,8 +20,8 @@ from torch.cuda import amp from utils.datasets import exif_transpose, letterbox -from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \ - check_suffix, check_requirements +from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible, + non_max_suppression, scale_coords, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync From 795fbe86baa2dd822f3a62be5dcd2e0d33ba70c6 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:20:08 +0100 Subject: [PATCH 03/63] pb to pt fix --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index 7080f83497fe..f5ec42e612e2 100644 --- a/detect.py +++ b/detect.py @@ -204,7 +204,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') From 9e1d0a6fd9108bb3f367202cad0a04c9ccebf1a8 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:34:27 +0100 Subject: [PATCH 04/63] Cleanup --- models/common.py | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/models/common.py b/models/common.py index b0606ddca897..5f1b05e88fe9 100644 --- a/models/common.py +++ b/models/common.py @@ -277,28 +277,27 @@ def forward(self, x): class DetectMultiBackend(nn.Module): def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): + # MultiBackend model load super().__init__() - # Load model w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: - from models.experimental import attempt_load + from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 - elif onnx: - if dnn: # OpenCV DNN - check_requirements(('opencv-python>=4.5.4',)) - net = cv2.dnn.readNetFromONNX(w) - else: # ONNX Runtime - check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) - import onnxruntime - session = onnxruntime.InferenceSession(w, None) + elif dnn: # ONNX OpenCV DNN + check_requirements(('opencv-python>=4.5.4',)) + net = cv2.dnn.readNetFromONNX(w) + elif onnx: # ONNX Runtime + check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) + import onnxruntime + session = onnxruntime.InferenceSession(w, None) else: # TensorFlow model (TFLite, pb, saved_model) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt @@ -325,24 +324,21 @@ def wrap_frozen_graph(gd, inputs, outputs): interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs + self.__dict__.update(locals()) # assign all variables to self - self.__dict__.update(locals()) # all all variables to self - - def forward(self, im, augment=False, profile=False, visualize=False): - # Inference + def forward(self, im, augment=False, visualize=False): + # MultiBackend inference if self.pt: - y = self.model(im, augment=augment, visualize=visualize)[0] - elif self.onnx: - if self.dnn: # OpenCV DNN - self.net.setInput(im) - y = self.net.forward() - else: # ONNX Runtime - y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + return self.model(im, augment=augment, visualize=visualize)[0] + elif self.dnn: # ONNX OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + elif self.onnx: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) - import tensorflow as tf im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) if self.pb: - y = self.frozen_func(x=tf.constant(im)).numpy() + y = self.frozen_func(x=self.tf.constant(im)).numpy() elif self.saved_model: y = self.model(im, training=False).numpy() elif self.tflite: @@ -361,7 +357,7 @@ def forward(self, im, augment=False, profile=False, visualize=False): y[..., 1] *= im.shape[1] # y y[..., 2] *= im.shape[2] # w y[..., 3] *= im.shape[1] # h - return y if self.pt else torch.tensor(y) + return torch.tensor(y) class AutoShape(nn.Module): From 4f775882510b3e390f20cc8f380808091cb880ea Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:36:57 +0100 Subject: [PATCH 05/63] explicit apply_classifier path --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index f5ec42e612e2..b93bc521c332 100644 --- a/detect.py +++ b/detect.py @@ -122,7 +122,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) dt[2] += time_sync() - t3 # Second-stage classifier (optional) - # pred = apply_classifier(pred, classifier_model, im, im0s) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image From 96c9462825469de16438af278ceb334193aa8930 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:40:19 +0100 Subject: [PATCH 06/63] Cleanup2 --- models/common.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/models/common.py b/models/common.py index 5f1b05e88fe9..2ea7088530e9 100644 --- a/models/common.py +++ b/models/common.py @@ -312,13 +312,12 @@ def wrap_frozen_graph(gd, inputs, outputs): elif saved_model: model = tf.keras.models.load_model(w) elif tflite: - if "edgetpu" in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python - import tflite_runtime.interpreter as tflri + if 'edgetpu' in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] - interpreter = tflri.Interpreter(model_path=w, - experimental_delegates=[tflri.load_delegate(delegate)]) + interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) else: interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate From 398d37788828f8b9b0c9e65016b98f6652af2627 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:41:32 +0100 Subject: [PATCH 07/63] Cleanup3 --- models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index 2ea7088530e9..7359043cb805 100644 --- a/models/common.py +++ b/models/common.py @@ -311,8 +311,8 @@ def wrap_frozen_graph(gd, inputs, outputs): frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) - elif tflite: - if 'edgetpu' in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + if 'edgetpu' in w.lower(): import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', From 89bf2f1bf4ee8cbf17038bf94348f2a93ca9c33a Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:45:06 +0100 Subject: [PATCH 08/63] Cleanup4 --- models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index 7359043cb805..b309705da9b0 100644 --- a/models/common.py +++ b/models/common.py @@ -276,8 +276,8 @@ def forward(self, x): class DetectMultiBackend(nn.Module): + # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): - # MultiBackend model load super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] @@ -326,7 +326,7 @@ def wrap_frozen_graph(gd, inputs, outputs): self.__dict__.update(locals()) # assign all variables to self def forward(self, im, augment=False, visualize=False): - # MultiBackend inference + # YOLOv5 MultiBackend inference if self.pt: return self.model(im, augment=augment, visualize=visualize)[0] elif self.dnn: # ONNX OpenCV DNN From 47550b03baf225f826e27ddba5dd5c26b1f48b80 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:47:41 +0100 Subject: [PATCH 09/63] Cleanup5 --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index b93bc521c332..a216ba2a59ca 100644 --- a/detect.py +++ b/detect.py @@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Load model model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx - imgsz = check_img_size(imgsz, s=model.stride) # check image size + imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: From d08b356c72b70c5c48865c72fe4a02b86706fd97 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:33:48 +0100 Subject: [PATCH 10/63] Cleanup6 --- models/common.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/models/common.py b/models/common.py index b309705da9b0..a050521aa8e0 100644 --- a/models/common.py +++ b/models/common.py @@ -277,14 +277,14 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN - def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): + def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - if pt: + if pt: # PyTorch from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride @@ -292,9 +292,11 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): if half: model.half() # to FP16 elif dnn: # ONNX OpenCV DNN + LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime + LOGGER.info(f'Loading {w} for ONNX Runtime inference...') check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) @@ -306,34 +308,41 @@ def wrap_frozen_graph(gd, inputs, outputs): return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) + LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...') graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: + LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...') model = tf.keras.models.load_model(w) elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python if 'edgetpu' in w.lower(): + LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...') import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) else: + LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs self.__dict__.update(locals()) # assign all variables to self - def forward(self, im, augment=False, visualize=False): + def forward(self, im, augment=False, visualize=False, val=False): # YOLOv5 MultiBackend inference - if self.pt: - return self.model(im, augment=augment, visualize=visualize)[0] - elif self.dnn: # ONNX OpenCV DNN - self.net.setInput(im) - y = self.net.forward() - elif self.onnx: # ONNX Runtime - y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + if self.pt: # PyTorch + y = self.model(im, augment=augment, visualize=visualize) + return y if val else y[0] + elif self.onnx: # ONNX + im = np.array(im) + if self.dnn: # ONNX OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + else: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) if self.pb: @@ -356,7 +365,8 @@ def forward(self, im, augment=False, visualize=False): y[..., 1] *= im.shape[1] # y y[..., 2] *= im.shape[2] # w y[..., 3] *= im.shape[1] # h - return torch.tensor(y) + y = torch.tensor(y) + return (y, []) if val else y class AutoShape(nn.Module): From 407d5d367aaa89b83ca977fa36d961ff1e9fda3c Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:45:55 +0100 Subject: [PATCH 11/63] val.py MultiBackend inference --- val.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/val.py b/val.py index 2118ad400ac7..6fd96e5ee4f7 100644 --- a/val.py +++ b/val.py @@ -23,10 +23,10 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load +from models.common import DetectMultiBackend from utils.callbacks import Callbacks from utils.datasets import create_dataloader -from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_suffix, check_yaml, +from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, ap_per_class @@ -100,6 +100,7 @@ def run(data, name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), @@ -120,10 +121,12 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - check_suffix(weights, '.pt') - model = attempt_load(weights, map_location=device) # load FP32 model - gs = max(int(model.stride.max()), 32) # grid size (max stride) - imgsz = check_img_size(imgsz, s=gs) # check image size + model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx + imgsz = check_img_size(imgsz, s=model.stride) # check image size + if not model.pt: + LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') + batch_size = 1 # export.py models default to batch-size 1 # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: @@ -146,10 +149,10 @@ def run(data, # Dataloader if not training: if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once + model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # run once pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True, + dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt, prefix=colorstr(f'{task}: '))[0] seen = 0 @@ -171,7 +174,7 @@ def run(data, dt[0] += t2 - t1 # Run model - out, train_out = model(img, augment=augment) # inference and training outputs + out, train_out = model(img, augment=augment, val=True) # inference and training outputs dt[1] += time_sync() - t2 # Compute loss @@ -318,6 +321,7 @@ def parse_opt(): parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') From 71b320aa836aa86706ac89ff255181c76d301bea Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:53:30 +0100 Subject: [PATCH 12/63] warmup fix --- detect.py | 2 +- val.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/detect.py b/detect.py index a216ba2a59ca..a110dd5c63ad 100644 --- a/detect.py +++ b/detect.py @@ -96,7 +96,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Run inference if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # run once + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() diff --git a/val.py b/val.py index 6fd96e5ee4f7..95fcbeea4eb5 100644 --- a/val.py +++ b/val.py @@ -148,8 +148,8 @@ def run(data, # Dataloader if not training: - if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # run once + if model.pt and device.type != 'cpu': + model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt, From 293e98da9117529516ca75c7cacaff6ff8306fd8 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:57:51 +0100 Subject: [PATCH 13/63] to device fix --- val.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/val.py b/val.py index 95fcbeea4eb5..cf148cf867f0 100644 --- a/val.py +++ b/val.py @@ -163,18 +163,21 @@ def run(data, dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] - for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): + for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() - img = img.to(device, non_blocking=True) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255 # 0 - 255 to 0.0 - 1.0 + if model.onnx: + im = np.array(im).astype('float32') + else: + im = im.to(device, non_blocking=True) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) - nb, _, height, width = img.shape # batch size, channels, height, width + nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 # Run model - out, train_out = model(img, augment=augment, val=True) # inference and training outputs + out, train_out = model(im, augment=augment, val=True) # inference and training outputs dt[1] += time_sync() - t2 # Compute loss @@ -205,12 +208,12 @@ def run(data, if single_cls: pred[:, 5] = 0 predn = pred.clone() - scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred + scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes - scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels + scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: @@ -224,14 +227,14 @@ def run(data, save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary - callbacks.run('on_val_image_end', pred, predn, path, names, img[si]) + callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: f = save_dir / f'val_batch{batch_i}_labels.jpg' # labels - Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start() f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions - Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy From 201107ebc305327674cddf99811e89ba4b3085cd Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:00:01 +0100 Subject: [PATCH 14/63] pt fix --- val.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index cf148cf867f0..5e466d4f208b 100644 --- a/val.py +++ b/val.py @@ -111,7 +111,7 @@ def run(data, # Initialize/load model and set device training = model is not None if training: # called by train.py - device = next(model.parameters()).device # get model device + device, pt = next(model.parameters()).device, True # get model device, PyTorch model else: # called directly device = select_device(device, batch_size=batch_size) @@ -165,13 +165,11 @@ def run(data, jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() - if model.onnx: - im = np.array(im).astype('float32') - else: + if pt: im = im.to(device, non_blocking=True) im = im.half() if half else im.float() # uint8 to fp16/32 + targets = targets.to(device) im /= 255 # 0 - 255 to 0.0 - 1.0 - targets = targets.to(device) nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 From a7f17e940c5db085f862ea4abdbcc8f8faea0e96 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:11:15 +0100 Subject: [PATCH 15/63] device fix --- val.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/val.py b/val.py index 5e466d4f208b..8337a901f972 100644 --- a/val.py +++ b/val.py @@ -127,6 +127,7 @@ def run(data, if not model.pt: LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') batch_size = 1 # export.py models default to batch-size 1 + device = torch.device('cpu') # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: @@ -169,6 +170,8 @@ def run(data, im = im.to(device, non_blocking=True) im = im.half() if half else im.float() # uint8 to fp16/32 targets = targets.to(device) + else: + im = im.numpy().astype('float32') im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() From 3cf44c32c3fc283c494414e23a4e63342d395d3d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:18:15 +0100 Subject: [PATCH 16/63] Val cleanup --- val.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index 8337a901f972..c25fb4b4b9c8 100644 --- a/val.py +++ b/val.py @@ -137,7 +137,7 @@ def run(data, data = check_dataset(data) # check # Half - half &= device.type != 'cpu' # half precision only supported on CUDA + half &= pt and device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() # Configure @@ -149,7 +149,7 @@ def run(data, # Dataloader if not training: - if model.pt and device.type != 'cpu': + if pt and device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images @@ -166,12 +166,12 @@ def run(data, jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() + im = im.half() if half else im.float() # uint8 to fp16/32 if pt: im = im.to(device, non_blocking=True) - im = im.half() if half else im.float() # uint8 to fp16/32 targets = targets.to(device) - else: - im = im.numpy().astype('float32') + # else: + # im = im.numpy().astype('float32') im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() From d32ca2e8c28ac8574312551372b5b7555a2173a2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:21:47 +0100 Subject: [PATCH 17/63] COCO128 URL to assets --- data/coco128.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/coco128.yaml b/data/coco128.yaml index b1dfb004afa1..84a91b18359d 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -27,4 +27,4 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't # Download script/URL (optional) -download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip +download: https://ultralytics.com/assets/coco128.zip From 5f3a5fb45d44ecf2f801f0e53688d4a11e38c9c2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:41:24 +0100 Subject: [PATCH 18/63] half fix --- detect.py | 8 +++----- models/common.py | 4 ++-- val.py | 6 ++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/detect.py b/detect.py index a110dd5c63ad..34b2095a789b 100644 --- a/detect.py +++ b/detect.py @@ -79,7 +79,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model - model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=stride) # check image size @@ -100,11 +100,9 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() - if onnx: - im = im.astype('float32') - else: + if pt: im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 + im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim diff --git a/models/common.py b/models/common.py index a050521aa8e0..9fa83a091434 100644 --- a/models/common.py +++ b/models/common.py @@ -277,7 +277,7 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN - def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True): + def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] @@ -289,7 +289,7 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True): model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names - if half: + if pt_half: model.half() # to FP16 elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') diff --git a/val.py b/val.py index c25fb4b4b9c8..d87b4e34f167 100644 --- a/val.py +++ b/val.py @@ -121,7 +121,7 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size if not model.pt: @@ -166,12 +166,10 @@ def run(data, jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() - im = im.half() if half else im.float() # uint8 to fp16/32 if pt: im = im.to(device, non_blocking=True) targets = targets.to(device) - # else: - # im = im.numpy().astype('float32') + im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() From 901566269d047767a60609b9d2b273f8b1dcaf92 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:47:58 +0100 Subject: [PATCH 19/63] detect fix --- detect.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/detect.py b/detect.py index 34b2095a789b..19102c77ce8d 100644 --- a/detect.py +++ b/detect.py @@ -102,7 +102,9 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) t1 = time_sync() if pt: im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 + im = im.half() if half else im.float() # uint8 to fp16/32 + else: + im = im.astype('float32') im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim From dc5c37012f0bf99c4452e397ccdc751c93229b4e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:55:38 +0100 Subject: [PATCH 20/63] detect fix 2 --- detect.py | 7 ++----- models/common.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/detect.py b/detect.py index 19102c77ce8d..8cb8e469b20a 100644 --- a/detect.py +++ b/detect.py @@ -100,11 +100,8 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() - if pt: - im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 - else: - im = im.astype('float32') + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim diff --git a/models/common.py b/models/common.py index 9fa83a091434..7032dc3be296 100644 --- a/models/common.py +++ b/models/common.py @@ -337,7 +337,7 @@ def forward(self, im, augment=False, visualize=False, val=False): y = self.model(im, augment=augment, visualize=visualize) return y if val else y[0] elif self.onnx: # ONNX - im = np.array(im) + im = im.cpu().numpy() # torch to numpy if self.dnn: # ONNX OpenCV DNN self.net.setInput(im) y = self.net.forward() From 77fbc8f1708d5c33ff4fb0cd5cf38a7ff47e2f5d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:05:19 +0100 Subject: [PATCH 21/63] remove half from DetectMultiBackend --- detect.py | 11 ++++++----- models/common.py | 4 +--- val.py | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/detect.py b/detect.py index 8cb8e469b20a..865104c32504 100644 --- a/detect.py +++ b/detect.py @@ -74,15 +74,16 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - # Initialize - device = select_device(device) - half &= device.type != 'cpu' # half precision only supported on CUDA - # Load model - model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) + device = select_device(device) + model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=stride) # check image size + # Half + half &= pt and device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() + # Dataloader if webcam: view_img = check_imshow() diff --git a/models/common.py b/models/common.py index 7032dc3be296..ee2fa6f89988 100644 --- a/models/common.py +++ b/models/common.py @@ -277,7 +277,7 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN - def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True): + def __init__(self, weights='yolov5s.pt', device=None, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] @@ -289,8 +289,6 @@ def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True): model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names - if pt_half: - model.half() # to FP16 elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) diff --git a/val.py b/val.py index d87b4e34f167..ded19e7b0daf 100644 --- a/val.py +++ b/val.py @@ -121,10 +121,10 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) + model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size - if not model.pt: + if not pt: LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') From a80c511ead1a02d980dd39e95f6f04d20157bc8e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:12:11 +0100 Subject: [PATCH 22/63] training half handling --- val.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/val.py b/val.py index ded19e7b0daf..4195f2a296fe 100644 --- a/val.py +++ b/val.py @@ -113,6 +113,8 @@ def run(data, if training: # called by train.py device, pt = next(model.parameters()).device, True # get model device, PyTorch model + half &= pt and device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) @@ -129,17 +131,12 @@ def run(data, batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') - # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 - # if device.type != 'cpu' and torch.cuda.device_count() > 1: - # model = nn.DataParallel(model) + half &= pt and device.type != 'cpu' # half precision only supported on CUDA + model.model.half() if half else model.model.float() # Data data = check_dataset(data) # check - # Half - half &= pt and device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() - # Configure model.eval() is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset From e165bd4367fcf33949a9feea5c9771d9b2ad2858 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:15:20 +0100 Subject: [PATCH 23/63] training half handling 2 --- val.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index 4195f2a296fe..39c03a499059 100644 --- a/val.py +++ b/val.py @@ -113,7 +113,7 @@ def run(data, if training: # called by train.py device, pt = next(model.parameters()).device, True # get model device, PyTorch model - half &= pt and device.type != 'cpu' # half precision only supported on CUDA + half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) @@ -126,14 +126,14 @@ def run(data, model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size - if not pt: + if pt: + half &= device.type != 'cpu' # half precision only supported on CUDA + model.model.half() if half else model.model.float() + else: LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') - half &= pt and device.type != 'cpu' # half precision only supported on CUDA - model.model.half() if half else model.model.float() - # Data data = check_dataset(data) # check From c743bb645a3b83e5f04daee3f084a9bc467acddd Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:23:34 +0100 Subject: [PATCH 24/63] training half handling 3 --- detect.py | 3 ++- val.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/detect.py b/detect.py index 865104c32504..bee873405b2d 100644 --- a/detect.py +++ b/detect.py @@ -82,7 +82,8 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Half half &= pt and device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() + if pt: + model.model.half() if half else model.model.float() # Dataloader if webcam: diff --git a/val.py b/val.py index 39c03a499059..1df03f21776a 100644 --- a/val.py +++ b/val.py @@ -126,13 +126,14 @@ def run(data, model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size + half &= pt and device.type != 'cpu' # half precision only supported on CUDA if pt: - half &= device.type != 'cpu' # half precision only supported on CUDA model.model.half() if half else model.model.float() else: - LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') + half = False batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') + LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') # Data data = check_dataset(data) # check From f312ab63d5dafe2c309a00fb9eb7d3028c861d5f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:29:51 +0100 Subject: [PATCH 25/63] Cleanup --- detect.py | 2 +- val.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/detect.py b/detect.py index bee873405b2d..2da09edc78ae 100644 --- a/detect.py +++ b/detect.py @@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) imgsz = check_img_size(imgsz, s=stride) # check image size # Half - half &= pt and device.type != 'cpu' # half precision only supported on CUDA + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA if pt: model.model.half() if half else model.model.float() diff --git a/val.py b/val.py index 1df03f21776a..da25017621cb 100644 --- a/val.py +++ b/val.py @@ -126,7 +126,7 @@ def run(data, model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size - half &= pt and device.type != 'cpu' # half precision only supported on CUDA + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA if pt: model.model.half() if half else model.model.float() else: From bdde9ef30ebac88aad4df0ce5f635d4178f0c2df Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:35:33 +0100 Subject: [PATCH 26/63] Fix CI error --- val.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/val.py b/val.py index da25017621cb..09a662d9c722 100644 --- a/val.py +++ b/val.py @@ -173,22 +173,22 @@ def run(data, t2 = time_sync() dt[0] += t2 - t1 - # Run model - out, train_out = model(im, augment=augment, val=True) # inference and training outputs + # Inference + out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 - # Compute loss + # Loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls - # Run NMS + # NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) dt[2] += time_sync() - t3 - # Statistics per image + # Metrics for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) @@ -233,7 +233,7 @@ def run(data, f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start() - # Compute statistics + # Compute metrics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) From 70039397cab8515d55ea998bb25ff4a2d6f7c95e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 23:43:01 +0100 Subject: [PATCH 27/63] Add torchscript _extra_files --- export.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/export.py b/export.py index f5eb487045b0..74fa67c99e32 100644 --- a/export.py +++ b/export.py @@ -21,6 +21,7 @@ """ import argparse +import json import os import subprocess import sys @@ -54,7 +55,9 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' f = file.with_suffix('.torchscript.pt') ts = torch.jit.trace(model, im, strict=False) - (optimize_for_mobile(ts) if optimize else ts).save(f) + dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type} + extra_files = {'config.txt': json.dumps(dict)} # torch._C.ExtraFilesMap() + (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: From ef3f161ef25fc447173e37e8b5905aa5d35505b1 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 8 Nov 2021 00:03:41 +0100 Subject: [PATCH 28/63] Add TorchScript --- detect.py | 6 +++--- export.py | 4 ++-- models/common.py | 14 ++++++++++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/detect.py b/detect.py index 2da09edc78ae..108f8f138052 100644 --- a/detect.py +++ b/detect.py @@ -77,7 +77,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn) - stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx + stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx imgsz = check_img_size(imgsz, s=stride) # check image size # Half @@ -89,10 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference - dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit) bs = len(dataset) # batch_size else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs diff --git a/export.py b/export.py index 74fa67c99e32..4cf30e34fc7b 100644 --- a/export.py +++ b/export.py @@ -55,8 +55,8 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' f = file.with_suffix('.torchscript.pt') ts = torch.jit.trace(model, im, strict=False) - dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type} - extra_files = {'config.txt': json.dumps(dict)} # torch._C.ExtraFilesMap() + d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names} + extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') diff --git a/models/common.py b/models/common.py index ee2fa6f89988..2acb17318996 100644 --- a/models/common.py +++ b/models/common.py @@ -3,6 +3,7 @@ Common modules """ +import json import logging import math import platform @@ -283,8 +284,17 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True): suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans + jit = pt and 'torchscript' in w.lower() stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - if pt: # PyTorch + + if jit: # TorchScript + LOGGER.info(f'Loading {w} for TorchScript inference...') + extra_files = {'config.txt': ''} # model metadata + model = torch.jit.load(w, _extra_files=extra_files) + if extra_files['config.txt']: + d = json.loads(extra_files['config.txt']) # extra_files dict + stride, names = int(d['stride']), d['names'] + elif pt: # PyTorch from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride @@ -332,7 +342,7 @@ def wrap_frozen_graph(gd, inputs, outputs): def forward(self, im, augment=False, visualize=False, val=False): # YOLOv5 MultiBackend inference if self.pt: # PyTorch - y = self.model(im, augment=augment, visualize=visualize) + y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) return y if val else y[0] elif self.onnx: # ONNX im = im.cpu().numpy() # torch to numpy From 82bfd0f315da0434cb8effd34ad79fa3cb84a378 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 8 Nov 2021 00:13:36 +0100 Subject: [PATCH 29/63] Add CoreML --- models/common.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/models/common.py b/models/common.py index 2acb17318996..9982a8d85c22 100644 --- a/models/common.py +++ b/models/common.py @@ -22,7 +22,7 @@ from utils.datasets import exif_transpose, letterbox from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible, - non_max_suppression, scale_coords, xyxy2xywh) + non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync @@ -281,9 +281,9 @@ class DetectMultiBackend(nn.Module): def __init__(self, weights='yolov5s.pt', device=None, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) - suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] + suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel'] check_suffix(w, suffixes) # check weights have acceptable suffix - pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans + pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans jit = pt and 'torchscript' in w.lower() stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults @@ -299,6 +299,9 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True): model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names + elif coreml: # CoreML *.mlmodel + import coremltools as ct + model = ct.models.MLModel(w) elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) @@ -341,9 +344,15 @@ def wrap_frozen_graph(gd, inputs, outputs): def forward(self, im, augment=False, visualize=False, val=False): # YOLOv5 MultiBackend inference + b, ch, h, w = im.shape # batch, channel, height, width if self.pt: # PyTorch y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) return y if val else y[0] + elif self.coreml: # CoreML *.mlmodel + y = self.model.predict({'image': im}) # coordinates are xywh normalized + box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels + conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) + y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) elif self.onnx: # ONNX im = im.cpu().numpy() # torch to numpy if self.dnn: # ONNX OpenCV DNN @@ -352,7 +361,7 @@ def forward(self, im, augment=False, visualize=False, val=False): else: # ONNX Runtime y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) - im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) + im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,h=640,w=640,3) if self.pb: y = self.frozen_func(x=self.tf.constant(im)).numpy() elif self.saved_model: @@ -369,10 +378,10 @@ def forward(self, im, augment=False, visualize=False, val=False): if int8: scale, zero_point = output['quantization'] y = (y.astype(np.float32) - zero_point) * scale # re-scale - y[..., 0] *= im.shape[2] # x - y[..., 1] *= im.shape[1] # y - y[..., 2] *= im.shape[2] # w - y[..., 3] *= im.shape[1] # h + y[..., 0] *= w # x + y[..., 1] *= h # y + y[..., 2] *= w # w + y[..., 3] *= h # h y = torch.tensor(y) return (y, []) if val else y From 109c5d65fecd112623f3b5700ab004f89ada001e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 8 Nov 2021 00:39:58 +0100 Subject: [PATCH 30/63] CoreML cleanup --- detect.py | 2 +- models/common.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/detect.py b/detect.py index 108f8f138052..cb3c4abd6425 100644 --- a/detect.py +++ b/detect.py @@ -203,7 +203,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.mlmodel', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') diff --git a/models/common.py b/models/common.py index 9982a8d85c22..6fa7bdb33622 100644 --- a/models/common.py +++ b/models/common.py @@ -349,6 +349,9 @@ def forward(self, im, augment=False, visualize=False, val=False): y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) return y if val else y[0] elif self.coreml: # CoreML *.mlmodel + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) + im = Image.fromarray((im[0] * 255).astype('uint8')) + # im = im.resize((192, 320), Image.ANTIALIAS) y = self.model.predict({'image': im}) # coordinates are xywh normalized box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) @@ -361,7 +364,7 @@ def forward(self, im, augment=False, visualize=False, val=False): else: # ONNX Runtime y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) - im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,h=640,w=640,3) + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) if self.pb: y = self.frozen_func(x=self.tf.constant(im)).numpy() elif self.saved_model: From 3248a57cbcb4457cfc311808088971094bcfcd64 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:16:26 +0100 Subject: [PATCH 31/63] New `DetectMultiBackend()` class --- detect.py | 124 ++++++++----------------------------------- models/common.py | 94 +++++++++++++++++++++++++++++++- utils/general.py | 3 +- utils/torch_utils.py | 20 ------- 4 files changed, 118 insertions(+), 123 deletions(-) diff --git a/detect.py b/detect.py index 661a0b86bc99..7080f83497fe 100644 --- a/detect.py +++ b/detect.py @@ -14,12 +14,10 @@ import argparse import os -import platform import sys from pathlib import Path import cv2 -import numpy as np import torch import torch.backends.cudnn as cudnn @@ -29,13 +27,12 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load +from models.common import DetectMultiBackend from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams -from utils.general import (LOGGER, apply_classifier, check_file, check_img_size, check_imshow, check_requirements, - check_suffix, colorstr, increment_path, non_max_suppression, print_args, scale_coords, - strip_optimizer, xyxy2xywh) +from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, + increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box -from utils.torch_utils import load_classifier, select_device, time_sync +from utils.torch_utils import select_device, time_sync @torch.no_grad() @@ -82,55 +79,9 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model - w = str(weights[0] if isinstance(weights, list) else weights) - classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] - check_suffix(w, suffixes) # check weights have acceptable suffix - pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans - stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - if pt: - model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) - stride = int(model.stride.max()) # model stride - names = model.module.names if hasattr(model, 'module') else model.names # get class names - if half: - model.half() # to FP16 - if classify: # second-stage classifier - modelc = load_classifier(name='resnet50', n=2) # initialize - modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() - elif onnx: - if dnn: - check_requirements(('opencv-python>=4.5.4',)) - net = cv2.dnn.readNetFromONNX(w) - else: - check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) - import onnxruntime - session = onnxruntime.InferenceSession(w, None) - else: # TensorFlow models - import tensorflow as tf - if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt - def wrap_frozen_graph(gd, inputs, outputs): - x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import - return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), - tf.nest.map_structure(x.graph.as_graph_element, outputs)) - - graph_def = tf.Graph().as_graph_def() - graph_def.ParseFromString(open(w, 'rb').read()) - frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") - elif saved_model: - model = tf.keras.models.load_model(w) - elif tflite: - if "edgetpu" in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python - import tflite_runtime.interpreter as tflri - delegate = {'Linux': 'libedgetpu.so.1', # install libedgetpu https://coral.ai/software/#edgetpu-runtime - 'Darwin': 'libedgetpu.1.dylib', - 'Windows': 'edgetpu.dll'}[platform.system()] - interpreter = tflri.Interpreter(model_path=w, experimental_delegates=[tflri.load_delegate(delegate)]) - else: - interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model - interpreter.allocate_tensors() # allocate - input_details = interpreter.get_input_details() # inputs - output_details = interpreter.get_output_details() # outputs - int8 = input_details[0]['dtype'] == np.uint8 # is TFLite quantized uint8 model - imgsz = check_img_size(imgsz, s=stride) # check image size + model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx + imgsz = check_img_size(imgsz, s=model.stride) # check image size # Dataloader if webcam: @@ -145,52 +96,24 @@ def wrap_frozen_graph(gd, inputs, outputs): # Run inference if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # run once dt, seen = [0.0, 0.0, 0.0], 0 - for path, img, im0s, vid_cap, s in dataset: + for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() if onnx: - img = img.astype('float32') + im = im.astype('float32') else: - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255 # 0 - 255 to 0.0 - 1.0 - if len(img.shape) == 3: - img = img[None] # expand for batch dim + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference - if pt: - visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred = model(img, augment=augment, visualize=visualize)[0] - elif onnx: - if dnn: - net.setInput(img) - pred = torch.tensor(net.forward()) - else: - pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) - else: # tensorflow model (tflite, pb, saved_model) - imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy - if pb: - pred = frozen_func(x=tf.constant(imn)).numpy() - elif saved_model: - pred = model(imn, training=False).numpy() - elif tflite: - if int8: - scale, zero_point = input_details[0]['quantization'] - imn = (imn / scale + zero_point).astype(np.uint8) # de-scale - interpreter.set_tensor(input_details[0]['index'], imn) - interpreter.invoke() - pred = interpreter.get_tensor(output_details[0]['index']) - if int8: - scale, zero_point = output_details[0]['quantization'] - pred = (pred.astype(np.float32) - zero_point) * scale # re-scale - pred[..., 0] *= imgsz[1] # x - pred[..., 1] *= imgsz[0] # y - pred[..., 2] *= imgsz[1] # w - pred[..., 3] *= imgsz[0] # h - pred = torch.tensor(pred) + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred = model(im, augment=augment, visualize=visualize) t3 = time_sync() dt[1] += t3 - t2 @@ -199,8 +122,7 @@ def wrap_frozen_graph(gd, inputs, outputs): dt[2] += time_sync() - t3 # Second-stage classifier (optional) - if classify: - pred = apply_classifier(pred, modelc, img, im0s) + # pred = apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image @@ -212,15 +134,15 @@ def wrap_frozen_graph(gd, inputs, outputs): p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path - save_path = str(save_dir / p.name) # img.jpg - txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt - s += '%gx%g ' % img.shape[2:] # print string + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt + s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): @@ -282,7 +204,7 @@ def wrap_frozen_graph(gd, inputs, outputs): def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') diff --git a/models/common.py b/models/common.py index 8035ef11a791..7f1ecfe0ee66 100644 --- a/models/common.py +++ b/models/common.py @@ -5,10 +5,12 @@ import logging import math +import platform import warnings from copy import copy from pathlib import Path +import cv2 import numpy as np import pandas as pd import requests @@ -18,7 +20,8 @@ from torch.cuda import amp from utils.datasets import exif_transpose, letterbox -from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh +from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \ + check_suffix, check_requirements from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync @@ -272,6 +275,95 @@ def forward(self, x): return torch.cat(x, self.d) +class DetectMultiBackend(nn.Module): + def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): + super().__init__() + # Load model + w = str(weights[0] if isinstance(weights, list) else weights) + suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] + check_suffix(w, suffixes) # check weights have acceptable suffix + pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans + stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults + if pt: + from models.experimental import attempt_load + model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) + stride = int(model.stride.max()) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + if half: + model.half() # to FP16 + elif onnx: + if dnn: # OpenCV DNN + check_requirements(('opencv-python>=4.5.4',)) + net = cv2.dnn.readNetFromONNX(w) + else: # ONNX Runtime + check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) + import onnxruntime + session = onnxruntime.InferenceSession(w, None) + else: # TensorFlow model (TFLite, pb, saved_model) + import tensorflow as tf + if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt + def wrap_frozen_graph(gd, inputs, outputs): + x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped + return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), + tf.nest.map_structure(x.graph.as_graph_element, outputs)) + + graph_def = tf.Graph().as_graph_def() + graph_def.ParseFromString(open(w, 'rb').read()) + frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") + elif saved_model: + model = tf.keras.models.load_model(w) + elif tflite: + if "edgetpu" in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + import tflite_runtime.interpreter as tflri + delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll'}[platform.system()] + interpreter = tflri.Interpreter(model_path=w, + experimental_delegates=[tflri.load_delegate(delegate)]) + else: + interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model + interpreter.allocate_tensors() # allocate + input_details = interpreter.get_input_details() # inputs + output_details = interpreter.get_output_details() # outputs + + self.__dict__.update(locals()) # all all variables to self + + def forward(self, im, augment=False, profile=False, visualize=False): + # Inference + if self.pt: + y = self.model(im, augment=augment, visualize=visualize)[0] + elif self.onnx: + if self.dnn: # OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + else: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + else: # TensorFlow model (TFLite, pb, saved_model) + import tensorflow as tf + im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) + if self.pb: + y = self.frozen_func(x=tf.constant(im)).numpy() + elif self.saved_model: + y = self.model(im, training=False).numpy() + elif self.tflite: + input, output = self.input_details[0], self.output_details[0] + int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model + if int8: + scale, zero_point = input['quantization'] + im = (im / scale + zero_point).astype(np.uint8) # de-scale + self.interpreter.set_tensor(input['index'], im) + self.interpreter.invoke() + y = self.interpreter.get_tensor(output['index']) + if int8: + scale, zero_point = output['quantization'] + y = (y.astype(np.float32) - zero_point) * scale # re-scale + y[..., 0] *= im.shape[2] # x + y[..., 1] *= im.shape[1] # y + y[..., 2] *= im.shape[2] # w + y[..., 3] *= im.shape[1] # h + return y if self.pt else torch.tensor(y) + + class AutoShape(nn.Module): # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS conf = 0.25 # NMS confidence threshold diff --git a/utils/general.py b/utils/general.py index 0f45d72498fe..86c7d90c8220 100755 --- a/utils/general.py +++ b/utils/general.py @@ -785,7 +785,8 @@ def print_mutation(results, hyp, save_dir, bucket): def apply_classifier(x, model, img, im0): - # Apply a second stage classifier to yolo outputs + # Apply a second stage classifier to YOLO outputs + # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() im0 = [im0] if isinstance(im0, np.ndarray) else im0 for i, d in enumerate(x): # per image if d is not None and len(d): diff --git a/utils/torch_utils.py b/utils/torch_utils.py index b36e98d0b656..d0f143b1a30b 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -18,7 +18,6 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -import torchvision from utils.general import LOGGER @@ -237,25 +236,6 @@ def model_info(model, verbose=False, img_size=640): LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") -def load_classifier(name='resnet101', n=2): - # Loads a pretrained model reshaped to n-class output - model = torchvision.models.__dict__[name](pretrained=True) - - # ResNet model properties - # input_size = [3, 224, 224] - # input_space = 'RGB' - # input_range = [0, 1] - # mean = [0.485, 0.456, 0.406] - # std = [0.229, 0.224, 0.225] - - # Reshape output to n classes - filters = model.fc.weight.shape[1] - model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) - model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) - model.fc.out_features = n - return model - - def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) # scales img(bs,3,y,x) by ratio constrained to gs-multiple if ratio == 1.0: From a9a0fedf2a1f923c37ded3b455d0567d40d7e4a8 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:20:08 +0100 Subject: [PATCH 32/63] pb to pt fix --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index 7080f83497fe..f5ec42e612e2 100644 --- a/detect.py +++ b/detect.py @@ -204,7 +204,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pb', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') From 11bd91c75191bbc66eabcf80ed63ee88ce14d593 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sun, 7 Nov 2021 13:17:56 +0000 Subject: [PATCH 33/63] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index 7f1ecfe0ee66..b0606ddca897 100644 --- a/models/common.py +++ b/models/common.py @@ -20,8 +20,8 @@ from torch.cuda import amp from utils.datasets import exif_transpose, letterbox -from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, scale_coords, xyxy2xywh, \ - check_suffix, check_requirements +from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible, + non_max_suppression, scale_coords, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync From 07b4289fa4224fa85a56fa6a7c3b7a5f8e6d5bf1 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:34:27 +0100 Subject: [PATCH 34/63] Cleanup --- models/common.py | 44 ++++++++++++++++++++------------------------ 1 file changed, 20 insertions(+), 24 deletions(-) diff --git a/models/common.py b/models/common.py index b0606ddca897..5f1b05e88fe9 100644 --- a/models/common.py +++ b/models/common.py @@ -277,28 +277,27 @@ def forward(self, x): class DetectMultiBackend(nn.Module): def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): + # MultiBackend model load super().__init__() - # Load model w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults if pt: - from models.experimental import attempt_load + from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names if half: model.half() # to FP16 - elif onnx: - if dnn: # OpenCV DNN - check_requirements(('opencv-python>=4.5.4',)) - net = cv2.dnn.readNetFromONNX(w) - else: # ONNX Runtime - check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) - import onnxruntime - session = onnxruntime.InferenceSession(w, None) + elif dnn: # ONNX OpenCV DNN + check_requirements(('opencv-python>=4.5.4',)) + net = cv2.dnn.readNetFromONNX(w) + elif onnx: # ONNX Runtime + check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) + import onnxruntime + session = onnxruntime.InferenceSession(w, None) else: # TensorFlow model (TFLite, pb, saved_model) import tensorflow as tf if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt @@ -325,24 +324,21 @@ def wrap_frozen_graph(gd, inputs, outputs): interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs + self.__dict__.update(locals()) # assign all variables to self - self.__dict__.update(locals()) # all all variables to self - - def forward(self, im, augment=False, profile=False, visualize=False): - # Inference + def forward(self, im, augment=False, visualize=False): + # MultiBackend inference if self.pt: - y = self.model(im, augment=augment, visualize=visualize)[0] - elif self.onnx: - if self.dnn: # OpenCV DNN - self.net.setInput(im) - y = self.net.forward() - else: # ONNX Runtime - y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + return self.model(im, augment=augment, visualize=visualize)[0] + elif self.dnn: # ONNX OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + elif self.onnx: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) - import tensorflow as tf im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) if self.pb: - y = self.frozen_func(x=tf.constant(im)).numpy() + y = self.frozen_func(x=self.tf.constant(im)).numpy() elif self.saved_model: y = self.model(im, training=False).numpy() elif self.tflite: @@ -361,7 +357,7 @@ def forward(self, im, augment=False, profile=False, visualize=False): y[..., 1] *= im.shape[1] # y y[..., 2] *= im.shape[2] # w y[..., 3] *= im.shape[1] # h - return y if self.pt else torch.tensor(y) + return torch.tensor(y) class AutoShape(nn.Module): From b6f6c0d9c29976de7dc506bd714666462520c3c9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:36:57 +0100 Subject: [PATCH 35/63] explicit apply_classifier path --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index f5ec42e612e2..b93bc521c332 100644 --- a/detect.py +++ b/detect.py @@ -122,7 +122,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) dt[2] += time_sync() - t3 # Second-stage classifier (optional) - # pred = apply_classifier(pred, classifier_model, im, im0s) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image From 4d85ec297e31f0eb1ecc96365c6011520be3b1a4 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:40:19 +0100 Subject: [PATCH 36/63] Cleanup2 --- models/common.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/models/common.py b/models/common.py index 5f1b05e88fe9..2ea7088530e9 100644 --- a/models/common.py +++ b/models/common.py @@ -312,13 +312,12 @@ def wrap_frozen_graph(gd, inputs, outputs): elif saved_model: model = tf.keras.models.load_model(w) elif tflite: - if "edgetpu" in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python - import tflite_runtime.interpreter as tflri + if 'edgetpu' in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] - interpreter = tflri.Interpreter(model_path=w, - experimental_delegates=[tflri.load_delegate(delegate)]) + interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) else: interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate From df737a01dd0ee7cb5d97904b5f2be439322f326f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:41:32 +0100 Subject: [PATCH 37/63] Cleanup3 --- models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index 2ea7088530e9..7359043cb805 100644 --- a/models/common.py +++ b/models/common.py @@ -311,8 +311,8 @@ def wrap_frozen_graph(gd, inputs, outputs): frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: model = tf.keras.models.load_model(w) - elif tflite: - if 'edgetpu' in w: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + if 'edgetpu' in w.lower(): import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', From 70e9dfb3621f43547c4551e3689fc4bc20ba4b9f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:45:06 +0100 Subject: [PATCH 38/63] Cleanup4 --- models/common.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/models/common.py b/models/common.py index 7359043cb805..b309705da9b0 100644 --- a/models/common.py +++ b/models/common.py @@ -276,8 +276,8 @@ def forward(self, x): class DetectMultiBackend(nn.Module): + # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): - # MultiBackend model load super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] @@ -326,7 +326,7 @@ def wrap_frozen_graph(gd, inputs, outputs): self.__dict__.update(locals()) # assign all variables to self def forward(self, im, augment=False, visualize=False): - # MultiBackend inference + # YOLOv5 MultiBackend inference if self.pt: return self.model(im, augment=augment, visualize=visualize)[0] elif self.dnn: # ONNX OpenCV DNN From 9729521164435d041a899c50bd12857e8358c98d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 14:47:41 +0100 Subject: [PATCH 39/63] Cleanup5 --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index b93bc521c332..a216ba2a59ca 100644 --- a/detect.py +++ b/detect.py @@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Load model model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx - imgsz = check_img_size(imgsz, s=model.stride) # check image size + imgsz = check_img_size(imgsz, s=stride) # check image size # Dataloader if webcam: From ab7358f6259e77b8e2a0ec55fd552b42158b3d63 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:33:48 +0100 Subject: [PATCH 40/63] Cleanup6 --- models/common.py | 32 +++++++++++++++++++++----------- 1 file changed, 21 insertions(+), 11 deletions(-) diff --git a/models/common.py b/models/common.py index b309705da9b0..a050521aa8e0 100644 --- a/models/common.py +++ b/models/common.py @@ -277,14 +277,14 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN - def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): + def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - if pt: + if pt: # PyTorch from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride @@ -292,9 +292,11 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=False): if half: model.half() # to FP16 elif dnn: # ONNX OpenCV DNN + LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) net = cv2.dnn.readNetFromONNX(w) elif onnx: # ONNX Runtime + LOGGER.info(f'Loading {w} for ONNX Runtime inference...') check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) import onnxruntime session = onnxruntime.InferenceSession(w, None) @@ -306,34 +308,41 @@ def wrap_frozen_graph(gd, inputs, outputs): return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), tf.nest.map_structure(x.graph.as_graph_element, outputs)) + LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...') graph_def = tf.Graph().as_graph_def() graph_def.ParseFromString(open(w, 'rb').read()) frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") elif saved_model: + LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...') model = tf.keras.models.load_model(w) elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python if 'edgetpu' in w.lower(): + LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...') import tflite_runtime.interpreter as tfli delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime 'Darwin': 'libedgetpu.1.dylib', 'Windows': 'edgetpu.dll'}[platform.system()] interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) else: + LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model interpreter.allocate_tensors() # allocate input_details = interpreter.get_input_details() # inputs output_details = interpreter.get_output_details() # outputs self.__dict__.update(locals()) # assign all variables to self - def forward(self, im, augment=False, visualize=False): + def forward(self, im, augment=False, visualize=False, val=False): # YOLOv5 MultiBackend inference - if self.pt: - return self.model(im, augment=augment, visualize=visualize)[0] - elif self.dnn: # ONNX OpenCV DNN - self.net.setInput(im) - y = self.net.forward() - elif self.onnx: # ONNX Runtime - y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + if self.pt: # PyTorch + y = self.model(im, augment=augment, visualize=visualize) + return y if val else y[0] + elif self.onnx: # ONNX + im = np.array(im) + if self.dnn: # ONNX OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + else: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) if self.pb: @@ -356,7 +365,8 @@ def forward(self, im, augment=False, visualize=False): y[..., 1] *= im.shape[1] # y y[..., 2] *= im.shape[2] # w y[..., 3] *= im.shape[1] # h - return torch.tensor(y) + y = torch.tensor(y) + return (y, []) if val else y class AutoShape(nn.Module): From c1bf0e2ddb01fa295253861ed7a2bb06a7f460c5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:45:55 +0100 Subject: [PATCH 41/63] val.py MultiBackend inference --- val.py | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) diff --git a/val.py b/val.py index d2797f1189ec..832a662ad3fe 100644 --- a/val.py +++ b/val.py @@ -23,10 +23,10 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load +from models.common import DetectMultiBackend from utils.callbacks import Callbacks from utils.datasets import create_dataloader -from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_suffix, check_yaml, +from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_yaml, coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, scale_coords, xywh2xyxy, xyxy2xywh) from utils.metrics import ConfusionMatrix, ap_per_class @@ -100,6 +100,7 @@ def run(data, name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), @@ -120,10 +121,12 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - check_suffix(weights, '.pt') - model = attempt_load(weights, map_location=device) # load FP32 model - gs = max(int(model.stride.max()), 32) # grid size (max stride) - imgsz = check_img_size(imgsz, s=gs) # check image size + model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx + imgsz = check_img_size(imgsz, s=model.stride) # check image size + if not model.pt: + LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') + batch_size = 1 # export.py models default to batch-size 1 # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: @@ -146,10 +149,10 @@ def run(data, # Dataloader if not training: if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once + model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # run once pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True, + dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt, prefix=colorstr(f'{task}: '))[0] seen = 0 @@ -171,7 +174,7 @@ def run(data, dt[0] += t2 - t1 # Run model - out, train_out = model(img, augment=augment) # inference and training outputs + out, train_out = model(img, augment=augment, val=True) # inference and training outputs dt[1] += time_sync() - t2 # Compute loss @@ -318,6 +321,7 @@ def parse_opt(): parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') From b5bae243af1303020fa64fbb7c53823e9d7c95ba Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:53:30 +0100 Subject: [PATCH 42/63] warmup fix --- detect.py | 2 +- val.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/detect.py b/detect.py index a216ba2a59ca..a110dd5c63ad 100644 --- a/detect.py +++ b/detect.py @@ -96,7 +96,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Run inference if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # run once + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() diff --git a/val.py b/val.py index 832a662ad3fe..f44e4ed573ff 100644 --- a/val.py +++ b/val.py @@ -148,8 +148,8 @@ def run(data, # Dataloader if not training: - if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # run once + if model.pt and device.type != 'cpu': + model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt, From 96f2b3c90fb952c7e80061bad4245b89dfa2d66e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 15:57:51 +0100 Subject: [PATCH 43/63] to device fix --- val.py | 25 ++++++++++++++----------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/val.py b/val.py index f44e4ed573ff..01097fd67d21 100644 --- a/val.py +++ b/val.py @@ -163,18 +163,21 @@ def run(data, dt, p, r, f1, mp, mr, map50, map = [0.0, 0.0, 0.0], 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0 loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] - for batch_i, (img, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): + for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() - img = img.to(device, non_blocking=True) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255 # 0 - 255 to 0.0 - 1.0 + if model.onnx: + im = np.array(im).astype('float32') + else: + im = im.to(device, non_blocking=True) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 targets = targets.to(device) - nb, _, height, width = img.shape # batch size, channels, height, width + nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 # Run model - out, train_out = model(img, augment=augment, val=True) # inference and training outputs + out, train_out = model(im, augment=augment, val=True) # inference and training outputs dt[1] += time_sync() - t2 # Compute loss @@ -205,12 +208,12 @@ def run(data, if single_cls: pred[:, 5] = 0 predn = pred.clone() - scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred + scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes - scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels + scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: @@ -224,14 +227,14 @@ def run(data, save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary - callbacks.run('on_val_image_end', pred, predn, path, names, img[si]) + callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: f = save_dir / f'val_batch{batch_i}_labels.jpg' # labels - Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start() f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions - Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start() # Compute statistics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy From 32974f23f54437fc8f99b1415c9ccc77c4dd2e5d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:00:01 +0100 Subject: [PATCH 44/63] pt fix --- val.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index 01097fd67d21..13f9866273f8 100644 --- a/val.py +++ b/val.py @@ -111,7 +111,7 @@ def run(data, # Initialize/load model and set device training = model is not None if training: # called by train.py - device = next(model.parameters()).device # get model device + device, pt = next(model.parameters()).device, True # get model device, PyTorch model else: # called directly device = select_device(device, batch_size=batch_size) @@ -165,13 +165,11 @@ def run(data, jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() - if model.onnx: - im = np.array(im).astype('float32') - else: + if pt: im = im.to(device, non_blocking=True) im = im.half() if half else im.float() # uint8 to fp16/32 + targets = targets.to(device) im /= 255 # 0 - 255 to 0.0 - 1.0 - targets = targets.to(device) nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 From d955ed658b2787308fa1818711ce6ba81bf893c8 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:11:15 +0100 Subject: [PATCH 45/63] device fix --- val.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/val.py b/val.py index 13f9866273f8..32712fbe558e 100644 --- a/val.py +++ b/val.py @@ -127,6 +127,7 @@ def run(data, if not model.pt: LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') batch_size = 1 # export.py models default to batch-size 1 + device = torch.device('cpu') # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 # if device.type != 'cpu' and torch.cuda.device_count() > 1: @@ -169,6 +170,8 @@ def run(data, im = im.to(device, non_blocking=True) im = im.half() if half else im.float() # uint8 to fp16/32 targets = targets.to(device) + else: + im = im.numpy().astype('float32') im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() From 9c253596180bbe4aea9691f05151efc0306cea4d Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:18:15 +0100 Subject: [PATCH 46/63] Val cleanup --- val.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index 32712fbe558e..993c0be0052a 100644 --- a/val.py +++ b/val.py @@ -137,7 +137,7 @@ def run(data, data = check_dataset(data) # check # Half - half &= device.type != 'cpu' # half precision only supported on CUDA + half &= pt and device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() # Configure @@ -149,7 +149,7 @@ def run(data, # Dataloader if not training: - if model.pt and device.type != 'cpu': + if pt and device.type != 'cpu': model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images @@ -166,12 +166,12 @@ def run(data, jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() + im = im.half() if half else im.float() # uint8 to fp16/32 if pt: im = im.to(device, non_blocking=True) - im = im.half() if half else im.float() # uint8 to fp16/32 targets = targets.to(device) - else: - im = im.numpy().astype('float32') + # else: + # im = im.numpy().astype('float32') im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() From e9cd5eb2323b133bc6db3ff4034fa7751d37c84f Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:21:47 +0100 Subject: [PATCH 47/63] COCO128 URL to assets --- data/coco128.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data/coco128.yaml b/data/coco128.yaml index b1dfb004afa1..84a91b18359d 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -27,4 +27,4 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't # Download script/URL (optional) -download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip +download: https://ultralytics.com/assets/coco128.zip From 54d3dfa38183b3642cd18077dad6e9104dd897f3 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:41:24 +0100 Subject: [PATCH 48/63] half fix --- detect.py | 8 +++----- models/common.py | 4 ++-- val.py | 6 ++---- 3 files changed, 7 insertions(+), 11 deletions(-) diff --git a/detect.py b/detect.py index a110dd5c63ad..34b2095a789b 100644 --- a/detect.py +++ b/detect.py @@ -79,7 +79,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) half &= device.type != 'cpu' # half precision only supported on CUDA # Load model - model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=stride) # check image size @@ -100,11 +100,9 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() - if onnx: - im = im.astype('float32') - else: + if pt: im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 + im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim diff --git a/models/common.py b/models/common.py index a050521aa8e0..9fa83a091434 100644 --- a/models/common.py +++ b/models/common.py @@ -277,7 +277,7 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN - def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True): + def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] @@ -289,7 +289,7 @@ def __init__(self, weights='yolov5s.pt', device=None, half=False, dnn=True): model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names - if half: + if pt_half: model.half() # to FP16 elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') diff --git a/val.py b/val.py index 993c0be0052a..c5b8e1cd7544 100644 --- a/val.py +++ b/val.py @@ -121,7 +121,7 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - model = DetectMultiBackend(weights, device=device, half=half, dnn=dnn) + model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size if not model.pt: @@ -166,12 +166,10 @@ def run(data, jdict, stats, ap, ap_class = [], [], [], [] for batch_i, (im, targets, paths, shapes) in enumerate(tqdm(dataloader, desc=s)): t1 = time_sync() - im = im.half() if half else im.float() # uint8 to fp16/32 if pt: im = im.to(device, non_blocking=True) targets = targets.to(device) - # else: - # im = im.numpy().astype('float32') + im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() From 0b07c0c4b539ee6cf10616e894434b69b096d7ff Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:47:58 +0100 Subject: [PATCH 49/63] detect fix --- detect.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/detect.py b/detect.py index 34b2095a789b..19102c77ce8d 100644 --- a/detect.py +++ b/detect.py @@ -102,7 +102,9 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) t1 = time_sync() if pt: im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 + im = im.half() if half else im.float() # uint8 to fp16/32 + else: + im = im.astype('float32') im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim From 55eefc0850845503e5cacab6a28be46b7e4cbe02 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 16:55:38 +0100 Subject: [PATCH 50/63] detect fix 2 --- detect.py | 7 ++----- models/common.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/detect.py b/detect.py index 19102c77ce8d..8cb8e469b20a 100644 --- a/detect.py +++ b/detect.py @@ -100,11 +100,8 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) dt, seen = [0.0, 0.0, 0.0], 0 for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() - if pt: - im = torch.from_numpy(im).to(device) - im = im.half() if half else im.float() # uint8 to fp16/32 - else: - im = im.astype('float32') + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 im /= 255 # 0 - 255 to 0.0 - 1.0 if len(im.shape) == 3: im = im[None] # expand for batch dim diff --git a/models/common.py b/models/common.py index 9fa83a091434..7032dc3be296 100644 --- a/models/common.py +++ b/models/common.py @@ -337,7 +337,7 @@ def forward(self, im, augment=False, visualize=False, val=False): y = self.model(im, augment=augment, visualize=visualize) return y if val else y[0] elif self.onnx: # ONNX - im = np.array(im) + im = im.cpu().numpy() # torch to numpy if self.dnn: # ONNX OpenCV DNN self.net.setInput(im) y = self.net.forward() From 17676ae64c25152885e5c00a98877366b9901473 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:05:19 +0100 Subject: [PATCH 51/63] remove half from DetectMultiBackend --- detect.py | 11 ++++++----- models/common.py | 4 +--- val.py | 4 ++-- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/detect.py b/detect.py index 8cb8e469b20a..865104c32504 100644 --- a/detect.py +++ b/detect.py @@ -74,15 +74,16 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - # Initialize - device = select_device(device) - half &= device.type != 'cpu' # half precision only supported on CUDA - # Load model - model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) + device = select_device(device) + model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=stride) # check image size + # Half + half &= pt and device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() + # Dataloader if webcam: view_img = check_imshow() diff --git a/models/common.py b/models/common.py index 7032dc3be296..ee2fa6f89988 100644 --- a/models/common.py +++ b/models/common.py @@ -277,7 +277,7 @@ def forward(self, x): class DetectMultiBackend(nn.Module): # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN - def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True): + def __init__(self, weights='yolov5s.pt', device=None, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] @@ -289,8 +289,6 @@ def __init__(self, weights='yolov5s.pt', device=None, pt_half=False, dnn=True): model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names - if pt_half: - model.half() # to FP16 elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) diff --git a/val.py b/val.py index c5b8e1cd7544..e62918596b11 100644 --- a/val.py +++ b/val.py @@ -121,10 +121,10 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - model = DetectMultiBackend(weights, device=device, pt_half=half, dnn=dnn) + model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size - if not model.pt: + if not pt: LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') From 3985a5928d4b26b5e2f16d19a3a6db31d6c56c60 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:12:11 +0100 Subject: [PATCH 52/63] training half handling --- val.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/val.py b/val.py index e62918596b11..e53f59635cbb 100644 --- a/val.py +++ b/val.py @@ -113,6 +113,8 @@ def run(data, if training: # called by train.py device, pt = next(model.parameters()).device, True # get model device, PyTorch model + half &= pt and device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) @@ -129,17 +131,12 @@ def run(data, batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') - # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 - # if device.type != 'cpu' and torch.cuda.device_count() > 1: - # model = nn.DataParallel(model) + half &= pt and device.type != 'cpu' # half precision only supported on CUDA + model.model.half() if half else model.model.float() # Data data = check_dataset(data) # check - # Half - half &= pt and device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() - # Configure model.eval() is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset From 9844c8177b7c75d8c3fd26714449fb54b0be06a2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:15:20 +0100 Subject: [PATCH 53/63] training half handling 2 --- val.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/val.py b/val.py index e53f59635cbb..83a7e62c18e6 100644 --- a/val.py +++ b/val.py @@ -113,7 +113,7 @@ def run(data, if training: # called by train.py device, pt = next(model.parameters()).device, True # get model device, PyTorch model - half &= pt and device.type != 'cpu' # half precision only supported on CUDA + half &= device.type != 'cpu' # half precision only supported on CUDA model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) @@ -126,14 +126,14 @@ def run(data, model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size - if not pt: + if pt: + half &= device.type != 'cpu' # half precision only supported on CUDA + model.model.half() if half else model.model.float() + else: LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') - half &= pt and device.type != 'cpu' # half precision only supported on CUDA - model.model.half() if half else model.model.float() - # Data data = check_dataset(data) # check From fe94f4bb7a781f1424487527b78e8d55bcc7b087 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:23:34 +0100 Subject: [PATCH 54/63] training half handling 3 --- detect.py | 3 ++- val.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/detect.py b/detect.py index 865104c32504..bee873405b2d 100644 --- a/detect.py +++ b/detect.py @@ -82,7 +82,8 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Half half &= pt and device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() + if pt: + model.model.half() if half else model.model.float() # Dataloader if webcam: diff --git a/val.py b/val.py index 83a7e62c18e6..a24df1bae062 100644 --- a/val.py +++ b/val.py @@ -126,13 +126,14 @@ def run(data, model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size + half &= pt and device.type != 'cpu' # half precision only supported on CUDA if pt: - half &= device.type != 'cpu' # half precision only supported on CUDA model.model.half() if half else model.model.float() else: - LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') + half = False batch_size = 1 # export.py models default to batch-size 1 device = torch.device('cpu') + LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') # Data data = check_dataset(data) # check From 28de24694adbd567869429d0d9bcc9dcc9f8fd46 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:29:51 +0100 Subject: [PATCH 55/63] Cleanup --- detect.py | 2 +- val.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/detect.py b/detect.py index bee873405b2d..2da09edc78ae 100644 --- a/detect.py +++ b/detect.py @@ -81,7 +81,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) imgsz = check_img_size(imgsz, s=stride) # check image size # Half - half &= pt and device.type != 'cpu' # half precision only supported on CUDA + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA if pt: model.model.half() if half else model.model.float() diff --git a/val.py b/val.py index a24df1bae062..08686cf0b4d9 100644 --- a/val.py +++ b/val.py @@ -126,7 +126,7 @@ def run(data, model = DetectMultiBackend(weights, device=device, dnn=dnn) stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx imgsz = check_img_size(imgsz, s=model.stride) # check image size - half &= pt and device.type != 'cpu' # half precision only supported on CUDA + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA if pt: model.model.half() if half else model.model.float() else: From 709d9ce7a3cfbab8024e8574591d388897be8409 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 17:35:33 +0100 Subject: [PATCH 56/63] Fix CI error --- val.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/val.py b/val.py index 08686cf0b4d9..baefa79bbfbe 100644 --- a/val.py +++ b/val.py @@ -173,22 +173,22 @@ def run(data, t2 = time_sync() dt[0] += t2 - t1 - # Run model - out, train_out = model(im, augment=augment, val=True) # inference and training outputs + # Inference + out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 - # Compute loss + # Loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls - # Run NMS + # NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) dt[2] += time_sync() - t3 - # Statistics per image + # Metrics for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) @@ -233,7 +233,7 @@ def run(data, f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start() - # Compute statistics + # Compute metrics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) From 19bdb6e5c5c9da999ab0dd6dd81943b3951190ea Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Sun, 7 Nov 2021 23:43:01 +0100 Subject: [PATCH 57/63] Add torchscript _extra_files --- export.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/export.py b/export.py index f5eb487045b0..74fa67c99e32 100644 --- a/export.py +++ b/export.py @@ -21,6 +21,7 @@ """ import argparse +import json import os import subprocess import sys @@ -54,7 +55,9 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' f = file.with_suffix('.torchscript.pt') ts = torch.jit.trace(model, im, strict=False) - (optimize_for_mobile(ts) if optimize else ts).save(f) + dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type} + extra_files = {'config.txt': json.dumps(dict)} # torch._C.ExtraFilesMap() + (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: From 358d9e3c75809e1c7e04e07d093a7ce9dc3c1920 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 8 Nov 2021 00:03:41 +0100 Subject: [PATCH 58/63] Add TorchScript --- detect.py | 6 +++--- export.py | 4 ++-- models/common.py | 14 ++++++++++++-- 3 files changed, 17 insertions(+), 7 deletions(-) diff --git a/detect.py b/detect.py index 2da09edc78ae..108f8f138052 100644 --- a/detect.py +++ b/detect.py @@ -77,7 +77,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) # Load model device = select_device(device) model = DetectMultiBackend(weights, device=device, dnn=dnn) - stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx + stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx imgsz = check_img_size(imgsz, s=stride) # check image size # Half @@ -89,10 +89,10 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference - dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit) bs = len(dataset) # batch_size else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs diff --git a/export.py b/export.py index 74fa67c99e32..4cf30e34fc7b 100644 --- a/export.py +++ b/export.py @@ -55,8 +55,8 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' f = file.with_suffix('.torchscript.pt') ts = torch.jit.trace(model, im, strict=False) - dict = {"im_shape": im.shape, "stride": int(max(model.stride)), "device": next(model.parameters()).device.type} - extra_files = {'config.txt': json.dumps(dict)} # torch._C.ExtraFilesMap() + d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names} + extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') diff --git a/models/common.py b/models/common.py index ee2fa6f89988..2acb17318996 100644 --- a/models/common.py +++ b/models/common.py @@ -3,6 +3,7 @@ Common modules """ +import json import logging import math import platform @@ -283,8 +284,17 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True): suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] check_suffix(w, suffixes) # check weights have acceptable suffix pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans + jit = pt and 'torchscript' in w.lower() stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults - if pt: # PyTorch + + if jit: # TorchScript + LOGGER.info(f'Loading {w} for TorchScript inference...') + extra_files = {'config.txt': ''} # model metadata + model = torch.jit.load(w, _extra_files=extra_files) + if extra_files['config.txt']: + d = json.loads(extra_files['config.txt']) # extra_files dict + stride, names = int(d['stride']), d['names'] + elif pt: # PyTorch from models.experimental import attempt_load # scoped to avoid circular import model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride @@ -332,7 +342,7 @@ def wrap_frozen_graph(gd, inputs, outputs): def forward(self, im, augment=False, visualize=False, val=False): # YOLOv5 MultiBackend inference if self.pt: # PyTorch - y = self.model(im, augment=augment, visualize=visualize) + y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) return y if val else y[0] elif self.onnx: # ONNX im = im.cpu().numpy() # torch to numpy From dc0b748e0ef23a681c5305e3bff54fb561c11d1e Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 8 Nov 2021 00:13:36 +0100 Subject: [PATCH 59/63] Add CoreML --- models/common.py | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) diff --git a/models/common.py b/models/common.py index 2acb17318996..9982a8d85c22 100644 --- a/models/common.py +++ b/models/common.py @@ -22,7 +22,7 @@ from utils.datasets import exif_transpose, letterbox from utils.general import (check_requirements, check_suffix, colorstr, increment_path, make_divisible, - non_max_suppression, scale_coords, xyxy2xywh) + non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh) from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync @@ -281,9 +281,9 @@ class DetectMultiBackend(nn.Module): def __init__(self, weights='yolov5s.pt', device=None, dnn=True): super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) - suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] + suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel'] check_suffix(w, suffixes) # check weights have acceptable suffix - pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans + pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans jit = pt and 'torchscript' in w.lower() stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults @@ -299,6 +299,9 @@ def __init__(self, weights='yolov5s.pt', device=None, dnn=True): model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) stride = int(model.stride.max()) # model stride names = model.module.names if hasattr(model, 'module') else model.names # get class names + elif coreml: # CoreML *.mlmodel + import coremltools as ct + model = ct.models.MLModel(w) elif dnn: # ONNX OpenCV DNN LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') check_requirements(('opencv-python>=4.5.4',)) @@ -341,9 +344,15 @@ def wrap_frozen_graph(gd, inputs, outputs): def forward(self, im, augment=False, visualize=False, val=False): # YOLOv5 MultiBackend inference + b, ch, h, w = im.shape # batch, channel, height, width if self.pt: # PyTorch y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) return y if val else y[0] + elif self.coreml: # CoreML *.mlmodel + y = self.model.predict({'image': im}) # coordinates are xywh normalized + box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels + conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) + y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) elif self.onnx: # ONNX im = im.cpu().numpy() # torch to numpy if self.dnn: # ONNX OpenCV DNN @@ -352,7 +361,7 @@ def forward(self, im, augment=False, visualize=False, val=False): else: # ONNX Runtime y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) - im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,640,640,3) + im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,h=640,w=640,3) if self.pb: y = self.frozen_func(x=self.tf.constant(im)).numpy() elif self.saved_model: @@ -369,10 +378,10 @@ def forward(self, im, augment=False, visualize=False, val=False): if int8: scale, zero_point = output['quantization'] y = (y.astype(np.float32) - zero_point) * scale # re-scale - y[..., 0] *= im.shape[2] # x - y[..., 1] *= im.shape[1] # y - y[..., 2] *= im.shape[2] # w - y[..., 3] *= im.shape[1] # h + y[..., 0] *= w # x + y[..., 1] *= h # y + y[..., 2] *= w # w + y[..., 3] *= h # h y = torch.tensor(y) return (y, []) if val else y From 0bfaba5d87dd2c479c176403279204e4e33a93a5 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Mon, 8 Nov 2021 00:39:58 +0100 Subject: [PATCH 60/63] CoreML cleanup --- detect.py | 2 +- models/common.py | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/detect.py b/detect.py index 108f8f138052..cb3c4abd6425 100644 --- a/detect.py +++ b/detect.py @@ -203,7 +203,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.mlmodel', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') diff --git a/models/common.py b/models/common.py index 9982a8d85c22..6fa7bdb33622 100644 --- a/models/common.py +++ b/models/common.py @@ -349,6 +349,9 @@ def forward(self, im, augment=False, visualize=False, val=False): y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) return y if val else y[0] elif self.coreml: # CoreML *.mlmodel + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) + im = Image.fromarray((im[0] * 255).astype('uint8')) + # im = im.resize((192, 320), Image.ANTIALIAS) y = self.model.predict({'image': im}) # coordinates are xywh normalized box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) @@ -361,7 +364,7 @@ def forward(self, im, augment=False, visualize=False, val=False): else: # ONNX Runtime y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] else: # TensorFlow model (TFLite, pb, saved_model) - im = im.permute(0, 2, 3, 1).cpu().numpy() # TF format (1,h=640,w=640,3) + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) if self.pb: y = self.frozen_func(x=self.tf.constant(im)).numpy() elif self.saved_model: From cd92e01d0eaf49a2e69aeca72a5b6761265263d2 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 9 Nov 2021 10:56:44 +0100 Subject: [PATCH 61/63] revert default to pt --- detect.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/detect.py b/detect.py index cb3c4abd6425..108f8f138052 100644 --- a/detect.py +++ b/detect.py @@ -203,7 +203,7 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) def parse_opt(): parser = argparse.ArgumentParser() - parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.mlmodel', help='model path(s)') + parser.add_argument('--weights', nargs='+', type=str, default=ROOT / 'yolov5s.pt', help='model path(s)') parser.add_argument('--source', type=str, default=ROOT / 'data/images', help='file/dir/URL/glob, 0 for webcam') parser.add_argument('--imgsz', '--img', '--img-size', nargs='+', type=int, default=[640], help='inference size h,w') parser.add_argument('--conf-thres', type=float, default=0.25, help='confidence threshold') From ffa76ee5b4f6ab82ec4975d1aaf37fb31dc117f9 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 9 Nov 2021 11:06:12 +0100 Subject: [PATCH 62/63] Add Usage examples --- models/common.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/models/common.py b/models/common.py index 0a68c6270f8e..3ea7ba5477a6 100644 --- a/models/common.py +++ b/models/common.py @@ -274,8 +274,17 @@ def forward(self, x): class DetectMultiBackend(nn.Module): - # YOLOv5 MultiBackend class for PyTorch, TorchScript, TensorFlow, TFLite, ONNX, OpenCV DNN + # YOLOv5 MultiBackend class for python inference on various backends def __init__(self, weights='yolov5s.pt', device=None, dnn=True): + # Usage: + # PyTorch: weights = *.pt + # TorchScript: *.torchscript.pt + # CoreML: *.mlmodel + # TensorFlow: *_saved_model + # TensorFlow: *.pb + # TensorFlow Lite: *.tflite + # ONNX Runtime: *.onnx + # OpenCV DNN: *.onnx with dnn=True super().__init__() w = str(weights[0] if isinstance(weights, list) else weights) suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel'] From 0f98f01686e8a732ee168bcec67c622c2f542f46 Mon Sep 17 00:00:00 2001 From: Glenn Jocher Date: Tue, 9 Nov 2021 16:14:41 +0100 Subject: [PATCH 63/63] Cleanup val --- val.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/val.py b/val.py index baefa79bbfbe..2bcbc582a500 100644 --- a/val.py +++ b/val.py @@ -124,8 +124,8 @@ def run(data, # Load model model = DetectMultiBackend(weights, device=device, dnn=dnn) - stride, names, pt, onnx = model.stride, model.names, model.pt, model.onnx - imgsz = check_img_size(imgsz, s=model.stride) # check image size + stride, pt = model.stride, model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA if pt: model.model.half() if half else model.model.float() @@ -151,7 +151,7 @@ def run(data, model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=model.pt, + dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=pt, prefix=colorstr(f'{task}: '))[0] seen = 0