diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml index a20f15c20c93..fcb64138b088 100644 --- a/.github/ISSUE_TEMPLATE/bug-report.yml +++ b/.github/ISSUE_TEMPLATE/bug-report.yml @@ -41,8 +41,8 @@ body: attributes: label: Bug description: Provide console output with error messages and/or screenshots of the bug. - placeholder: > - TIP: Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response. + placeholder: | + 💡 ProTip! Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response. validations: required: true @@ -51,7 +51,7 @@ body: label: Environment description: Please specify the software and hardware you used to produce the bug. placeholder: | - - YOLO: YOLOv5 🚀 v6.0-37-g620b535 torch 1.9.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB) + - YOLO: YOLOv5 🚀 v6.0-67-g60e42e1 torch 1.9.0+cu111 CUDA:0 (A100-SXM4-40GB, 40536MiB) - OS: Ubuntu 20.04 - Python: 3.9.0 validations: @@ -64,7 +64,9 @@ body: When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem. This is referred to by community members as creating a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example). placeholder: | - # code to reproduce your issue here + ``` + # Code to reproduce your issue here + ``` validations: required: false diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml index 9ae5dd57c608..8e0993c68bab 100644 --- a/.github/ISSUE_TEMPLATE/question.yml +++ b/.github/ISSUE_TEMPLATE/question.yml @@ -22,8 +22,8 @@ body: attributes: label: Question description: What is your question? - placeholder: > - TIP: Include as much information as possible (screenshots, links, reference etc.) to receive the most helpful response. + placeholder: | + 💡 ProTip! Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response. validations: required: true diff --git a/.github/workflows/code-format.yml b/.github/workflows/code-format.yml deleted file mode 100644 index 6ebc6cc01c0b..000000000000 --- a/.github/workflows/code-format.yml +++ /dev/null @@ -1,47 +0,0 @@ -# Run code formatting GitHub Action, can be replaced by this bot: https://github.com/marketplace/pre-commit-ci - -name: Code formatting - -on: # https://help.github.com/en/actions/reference/events-that-trigger-workflows - push: - branches: [master] - pull_request: {} - -jobs: - pep8-check-flake8: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@master - - uses: actions/setup-python@v2 - with: - python-version: 3.7 - - name: Install dependencies - run: | - pip install flake8 - pip list - shell: bash - - name: PEP8 - run: | - flake8 . - - pre-commit-check: - runs-on: ubuntu-20.04 - steps: - - uses: actions/checkout@v2 - # for private repo - first is the checkout step, which needs to use unlimited fetch depth for pushing - with: - fetch-depth: 0 - - uses: actions/setup-python@v2 - - - name: set PY - run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV - - uses: actions/cache@v2 - with: - path: ~/.cache/pre-commit - key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }} - - - uses: pre-commit/action@v2.0.3 - # this action also provides an additional behaviour when used in private repositories - # when configured with a github token, the action will push back fixes to the pull request branch - with: - token: ${{ secrets.GITHUB_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2eb78aa17ef4..48e752f448f1 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -30,12 +30,11 @@ repos: args: [--py36-plus] name: Upgrade code - # TODO - #- repo: https://github.com/PyCQA/isort - # rev: 5.9.3 - # hooks: - # - id: isort - # name: imports + - repo: https://github.com/PyCQA/isort + rev: 5.9.3 + hooks: + - id: isort + name: Sort imports # TODO #- repo: https://github.com/pre-commit/mirrors-yapf diff --git a/Dockerfile b/Dockerfile index 0ee89b432b8f..fe1acb0a6540 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ # YOLOv5 🚀 by Ultralytics, GPL-3.0 license # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch -FROM nvcr.io/nvidia/pytorch:21.05-py3 +FROM nvcr.io/nvidia/pytorch:21.10-py3 # Install linux packages RUN apt update && apt install -y zip htop screen libgl1-mesa-glx @@ -11,8 +11,8 @@ COPY requirements.txt . RUN python -m pip install --upgrade pip RUN pip uninstall -y nvidia-tensorboard nvidia-tensorboard-plugin-dlprof RUN pip install --no-cache -r requirements.txt coremltools onnx gsutil notebook wandb>=0.12.2 -RUN pip install --no-cache -U torch torchvision numpy -# RUN pip install --no-cache torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html +RUN pip install --no-cache -U torch torchvision numpy Pillow +# RUN pip install --no-cache torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html # Create working directory RUN mkdir -p /usr/src/app diff --git a/README.md b/README.md index 3e2f5b656cde..6e72d85da7ee 100644 --- a/README.md +++ b/README.md @@ -109,11 +109,11 @@ the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases) and ```bash $ python detect.py --source 0 # webcam - file.jpg # image - file.mp4 # video + img.jpg # image + vid.mp4 # video path/ # directory path/*.jpg # glob - 'https://youtu.be/NUsoVlDFqZg' # YouTube + 'https://youtu.be/Zgi9g1ksQHc' # YouTube 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream ``` diff --git a/data/coco128.yaml b/data/coco128.yaml index b1dfb004afa1..84a91b18359d 100644 --- a/data/coco128.yaml +++ b/data/coco128.yaml @@ -27,4 +27,4 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't # Download script/URL (optional) -download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip +download: https://ultralytics.com/assets/coco128.zip diff --git a/detect.py b/detect.py index f9c7bac3fca2..108f8f138052 100644 --- a/detect.py +++ b/detect.py @@ -3,7 +3,13 @@ Run inference on images, videos, directories, streams, etc. Usage: - $ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640 + $ python path/to/detect.py --weights yolov5s.pt --source 0 # webcam + img.jpg # image + vid.mp4 # video + path/ # directory + path/*.jpg # glob + 'https://youtu.be/Zgi9g1ksQHc' # YouTube + 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream """ import argparse @@ -12,7 +18,6 @@ from pathlib import Path import cv2 -import numpy as np import torch import torch.backends.cudnn as cudnn @@ -22,12 +27,12 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load -from utils.datasets import LoadImages, LoadStreams -from utils.general import apply_classifier, check_img_size, check_imshow, check_requirements, check_suffix, colorstr, \ - increment_path, non_max_suppression, print_args, save_one_box, scale_coords, strip_optimizer, xyxy2xywh, LOGGER -from utils.plots import Annotator, colors -from utils.torch_utils import load_classifier, select_device, time_sync +from models.common import DetectMultiBackend +from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams +from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr, + increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh) +from utils.plots import Annotator, colors, save_one_box +from utils.torch_utils import select_device, time_sync @torch.no_grad() @@ -59,120 +64,55 @@ def run(weights=ROOT / 'yolov5s.pt', # model.pt path(s) ): source = str(source) save_img = not nosave and not source.endswith('.txt') # save inference images - webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith( - ('rtsp://', 'rtmp://', 'http://', 'https://')) + is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS) + is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://')) + webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file) + if is_url and is_file: + source = check_file(source) # download # Directories save_dir = increment_path(Path(project) / name, exist_ok=exist_ok) # increment run (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir - # Initialize + # Load model device = select_device(device) - half &= device.type != 'cpu' # half precision only supported on CUDA + model = DetectMultiBackend(weights, device=device, dnn=dnn) + stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx + imgsz = check_img_size(imgsz, s=stride) # check image size - # Load model - w = str(weights[0] if isinstance(weights, list) else weights) - classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', ''] - check_suffix(w, suffixes) # check weights have acceptable suffix - pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes) # backend booleans - stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults + # Half + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA if pt: - model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) - stride = int(model.stride.max()) # model stride - names = model.module.names if hasattr(model, 'module') else model.names # get class names - if half: - model.half() # to FP16 - if classify: # second-stage classifier - modelc = load_classifier(name='resnet50', n=2) # initialize - modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval() - elif onnx: - if dnn: - check_requirements(('opencv-python>=4.5.4',)) - net = cv2.dnn.readNetFromONNX(w) - else: - check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) - import onnxruntime - session = onnxruntime.InferenceSession(w, None) - else: # TensorFlow models - import tensorflow as tf - if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt - def wrap_frozen_graph(gd, inputs, outputs): - x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped import - return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), - tf.nest.map_structure(x.graph.as_graph_element, outputs)) - - graph_def = tf.Graph().as_graph_def() - graph_def.ParseFromString(open(w, 'rb').read()) - frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") - elif saved_model: - model = tf.keras.models.load_model(w) - elif tflite: - interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model - interpreter.allocate_tensors() # allocate - input_details = interpreter.get_input_details() # inputs - output_details = interpreter.get_output_details() # outputs - int8 = input_details[0]['dtype'] == np.uint8 # is TFLite quantized uint8 model - imgsz = check_img_size(imgsz, s=stride) # check image size + model.model.half() if half else model.model.float() # Dataloader if webcam: view_img = check_imshow() cudnn.benchmark = True # set True to speed up constant image size inference - dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit) bs = len(dataset) # batch_size else: - dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt) + dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit) bs = 1 # batch_size vid_path, vid_writer = [None] * bs, [None] * bs # Run inference if pt and device.type != 'cpu': - model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters()))) # run once + model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters()))) # warmup dt, seen = [0.0, 0.0, 0.0], 0 - for path, img, im0s, vid_cap, s in dataset: + for path, im, im0s, vid_cap, s in dataset: t1 = time_sync() - if onnx: - img = img.astype('float32') - else: - img = torch.from_numpy(img).to(device) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - if len(img.shape) == 3: - img = img[None] # expand for batch dim + im = torch.from_numpy(im).to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + if len(im.shape) == 3: + im = im[None] # expand for batch dim t2 = time_sync() dt[0] += t2 - t1 # Inference - if pt: - visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False - pred = model(img, augment=augment, visualize=visualize)[0] - elif onnx: - if dnn: - net.setInput(img) - pred = torch.tensor(net.forward()) - else: - pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img})) - else: # tensorflow model (tflite, pb, saved_model) - imn = img.permute(0, 2, 3, 1).cpu().numpy() # image in numpy - if pb: - pred = frozen_func(x=tf.constant(imn)).numpy() - elif saved_model: - pred = model(imn, training=False).numpy() - elif tflite: - if int8: - scale, zero_point = input_details[0]['quantization'] - imn = (imn / scale + zero_point).astype(np.uint8) # de-scale - interpreter.set_tensor(input_details[0]['index'], imn) - interpreter.invoke() - pred = interpreter.get_tensor(output_details[0]['index']) - if int8: - scale, zero_point = output_details[0]['quantization'] - pred = (pred.astype(np.float32) - zero_point) * scale # re-scale - pred[..., 0] *= imgsz[1] # x - pred[..., 1] *= imgsz[0] # y - pred[..., 2] *= imgsz[1] # w - pred[..., 3] *= imgsz[0] # h - pred = torch.tensor(pred) + visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False + pred = model(im, augment=augment, visualize=visualize) t3 = time_sync() dt[1] += t3 - t2 @@ -181,8 +121,7 @@ def wrap_frozen_graph(gd, inputs, outputs): dt[2] += time_sync() - t3 # Second-stage classifier (optional) - if classify: - pred = apply_classifier(pred, modelc, img, im0s) + # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s) # Process predictions for i, det in enumerate(pred): # per image @@ -194,15 +133,15 @@ def wrap_frozen_graph(gd, inputs, outputs): p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0) p = Path(p) # to Path - save_path = str(save_dir / p.name) # img.jpg - txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # img.txt - s += '%gx%g ' % img.shape[2:] # print string + save_path = str(save_dir / p.name) # im.jpg + txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}') # im.txt + s += '%gx%g ' % im.shape[2:] # print string gn = torch.tensor(im0.shape)[[1, 0, 1, 0]] # normalization gain whwh imc = im0.copy() if save_crop else im0 # for save_crop annotator = Annotator(im0, line_width=line_thickness, example=str(names)) if len(det): # Rescale boxes from img_size to im0 size - det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round() + det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round() # Print results for c in det[:, -1].unique(): diff --git a/export.py b/export.py index 47dbcab50144..4cf30e34fc7b 100644 --- a/export.py +++ b/export.py @@ -21,6 +21,7 @@ """ import argparse +import json import os import subprocess import sys @@ -42,8 +43,8 @@ from models.yolo import Detect from utils.activations import SiLU from utils.datasets import LoadImages -from utils.general import check_dataset, check_img_size, check_requirements, colorstr, file_size, print_args, \ - url2file, LOGGER +from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, colorstr, file_size, print_args, + url2file) from utils.torch_utils import select_device @@ -54,7 +55,9 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:' f = file.with_suffix('.torchscript.pt') ts = torch.jit.trace(model, im, strict=False) - (optimize_for_mobile(ts) if optimize else ts).save(f) + d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names} + extra_files = {'config.txt': json.dumps(d)} # torch._C.ExtraFilesMap() + (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') except Exception as e: @@ -117,7 +120,7 @@ def export_coreml(model, im, file, prefix=colorstr('CoreML:')): model.train() # CoreML exports should be placed in model.train() mode ts = torch.jit.trace(model, im, strict=False) # TorchScript model - ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255.0, bias=[0, 0, 0])]) + ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])]) ct_model.save(f) LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)') @@ -135,7 +138,8 @@ def export_saved_model(model, im, file, dynamic, try: import tensorflow as tf from tensorflow import keras - from models.tf import TFModel, TFDetect + + from models.tf import TFDetect, TFModel LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') f = str(file).replace('.pt', '_saved_model') @@ -182,6 +186,7 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te # YOLOv5 TensorFlow Lite export try: import tensorflow as tf + from models.tf import representative_dataset_gen LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...') @@ -215,6 +220,7 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')): try: check_requirements(('tensorflowjs',)) import re + import tensorflowjs as tfjs LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...') diff --git a/hubconf.py b/hubconf.py index a697e033b09b..3488fef76ac5 100644 --- a/hubconf.py +++ b/hubconf.py @@ -27,10 +27,10 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo """ from pathlib import Path - from models.yolo import Model from models.experimental import attempt_load - from utils.general import check_requirements, set_logging + from models.yolo import Model from utils.downloads import attempt_download + from utils.general import check_requirements, intersect_dicts, set_logging from utils.torch_utils import select_device file = Path(__file__).resolve() @@ -49,9 +49,8 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo model = Model(cfg, channels, classes) # create model if pretrained: ckpt = torch.load(attempt_download(path), map_location=device) # load - msd = model.state_dict() # model state_dict csd = ckpt['model'].float().state_dict() # checkpoint state_dict as FP32 - csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape} # filter + csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors']) # intersect model.load_state_dict(csd, strict=False) # load if len(ckpt['model'].names) == classes: model.names = ckpt['model'].names # set class names attribute @@ -125,10 +124,11 @@ def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=Tr # model = custom(path='path/to/model.pt') # custom # Verify inference + from pathlib import Path + import cv2 import numpy as np from PIL import Image - from pathlib import Path imgs = ['data/images/zidane.jpg', # filename Path('data/images/zidane.jpg'), # Path diff --git a/models/common.py b/models/common.py index 8b70a6fea595..3ea7ba5477a6 100644 --- a/models/common.py +++ b/models/common.py @@ -3,12 +3,14 @@ Common modules """ -import logging +import json import math +import platform import warnings from copy import copy from pathlib import Path +import cv2 import numpy as np import pandas as pd import requests @@ -18,13 +20,11 @@ from torch.cuda import amp from utils.datasets import exif_transpose, letterbox -from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \ - scale_coords, xyxy2xywh -from utils.plots import Annotator, colors +from utils.general import (LOGGER, check_requirements, check_suffix, colorstr, increment_path, make_divisible, + non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh) +from utils.plots import Annotator, colors, save_one_box from utils.torch_utils import time_sync -LOGGER = logging.getLogger(__name__) - def autopad(k, p=None): # kernel, padding # Pad to 'same' @@ -273,6 +273,128 @@ def forward(self, x): return torch.cat(x, self.d) +class DetectMultiBackend(nn.Module): + # YOLOv5 MultiBackend class for python inference on various backends + def __init__(self, weights='yolov5s.pt', device=None, dnn=True): + # Usage: + # PyTorch: weights = *.pt + # TorchScript: *.torchscript.pt + # CoreML: *.mlmodel + # TensorFlow: *_saved_model + # TensorFlow: *.pb + # TensorFlow Lite: *.tflite + # ONNX Runtime: *.onnx + # OpenCV DNN: *.onnx with dnn=True + super().__init__() + w = str(weights[0] if isinstance(weights, list) else weights) + suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel'] + check_suffix(w, suffixes) # check weights have acceptable suffix + pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes) # backend booleans + jit = pt and 'torchscript' in w.lower() + stride, names = 64, [f'class{i}' for i in range(1000)] # assign defaults + + if jit: # TorchScript + LOGGER.info(f'Loading {w} for TorchScript inference...') + extra_files = {'config.txt': ''} # model metadata + model = torch.jit.load(w, _extra_files=extra_files) + if extra_files['config.txt']: + d = json.loads(extra_files['config.txt']) # extra_files dict + stride, names = int(d['stride']), d['names'] + elif pt: # PyTorch + from models.experimental import attempt_load # scoped to avoid circular import + model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device) + stride = int(model.stride.max()) # model stride + names = model.module.names if hasattr(model, 'module') else model.names # get class names + elif coreml: # CoreML *.mlmodel + import coremltools as ct + model = ct.models.MLModel(w) + elif dnn: # ONNX OpenCV DNN + LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...') + check_requirements(('opencv-python>=4.5.4',)) + net = cv2.dnn.readNetFromONNX(w) + elif onnx: # ONNX Runtime + LOGGER.info(f'Loading {w} for ONNX Runtime inference...') + check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime')) + import onnxruntime + session = onnxruntime.InferenceSession(w, None) + else: # TensorFlow model (TFLite, pb, saved_model) + import tensorflow as tf + if pb: # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt + def wrap_frozen_graph(gd, inputs, outputs): + x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), []) # wrapped + return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs), + tf.nest.map_structure(x.graph.as_graph_element, outputs)) + + LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...') + graph_def = tf.Graph().as_graph_def() + graph_def.ParseFromString(open(w, 'rb').read()) + frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0") + elif saved_model: + LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...') + model = tf.keras.models.load_model(w) + elif tflite: # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python + if 'edgetpu' in w.lower(): + LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...') + import tflite_runtime.interpreter as tfli + delegate = {'Linux': 'libedgetpu.so.1', # install https://coral.ai/software/#edgetpu-runtime + 'Darwin': 'libedgetpu.1.dylib', + 'Windows': 'edgetpu.dll'}[platform.system()] + interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)]) + else: + LOGGER.info(f'Loading {w} for TensorFlow Lite inference...') + interpreter = tf.lite.Interpreter(model_path=w) # load TFLite model + interpreter.allocate_tensors() # allocate + input_details = interpreter.get_input_details() # inputs + output_details = interpreter.get_output_details() # outputs + self.__dict__.update(locals()) # assign all variables to self + + def forward(self, im, augment=False, visualize=False, val=False): + # YOLOv5 MultiBackend inference + b, ch, h, w = im.shape # batch, channel, height, width + if self.pt: # PyTorch + y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize) + return y if val else y[0] + elif self.coreml: # CoreML *.mlmodel + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) + im = Image.fromarray((im[0] * 255).astype('uint8')) + # im = im.resize((192, 320), Image.ANTIALIAS) + y = self.model.predict({'image': im}) # coordinates are xywh normalized + box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]]) # xyxy pixels + conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float) + y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1) + elif self.onnx: # ONNX + im = im.cpu().numpy() # torch to numpy + if self.dnn: # ONNX OpenCV DNN + self.net.setInput(im) + y = self.net.forward() + else: # ONNX Runtime + y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0] + else: # TensorFlow model (TFLite, pb, saved_model) + im = im.permute(0, 2, 3, 1).cpu().numpy() # torch BCHW to numpy BHWC shape(1,320,192,3) + if self.pb: + y = self.frozen_func(x=self.tf.constant(im)).numpy() + elif self.saved_model: + y = self.model(im, training=False).numpy() + elif self.tflite: + input, output = self.input_details[0], self.output_details[0] + int8 = input['dtype'] == np.uint8 # is TFLite quantized uint8 model + if int8: + scale, zero_point = input['quantization'] + im = (im / scale + zero_point).astype(np.uint8) # de-scale + self.interpreter.set_tensor(input['index'], im) + self.interpreter.invoke() + y = self.interpreter.get_tensor(output['index']) + if int8: + scale, zero_point = output['quantization'] + y = (y.astype(np.float32) - zero_point) * scale # re-scale + y[..., 0] *= w # x + y[..., 1] *= h # y + y[..., 2] *= w # w + y[..., 3] *= h # h + y = torch.tensor(y) + return (y, []) if val else y + + class AutoShape(nn.Module): # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS conf = 0.25 # NMS confidence threshold @@ -339,7 +461,7 @@ def forward(self, imgs, size=640, augment=False, profile=False): x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs] # pad x = np.stack(x, 0) if n > 1 else x[0][None] # stack x = np.ascontiguousarray(x.transpose((0, 3, 1, 2))) # BHWC to BCHW - x = torch.from_numpy(x).to(p.device).type_as(p) / 255. # uint8 to fp16/32 + x = torch.from_numpy(x).to(p.device).type_as(p) / 255 # uint8 to fp16/32 t.append(time_sync()) with amp.autocast(enabled=p.device.type != 'cpu'): @@ -362,7 +484,7 @@ class Detections: def __init__(self, imgs, pred, files, times=None, names=None, shape=None): super().__init__() d = pred[0].device # device - gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1., 1.], device=d) for im in imgs] # normalizations + gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs] # normalizations self.imgs = imgs # list of images as numpy arrays self.pred = pred # list of tensors pred[0] = (xyxy, conf, cls) self.names = names # class names diff --git a/models/experimental.py b/models/experimental.py index 2e92ccb36faf..463e5514a06e 100644 --- a/models/experimental.py +++ b/models/experimental.py @@ -3,6 +3,7 @@ Experimental modules """ import math + import numpy as np import torch import torch.nn as nn @@ -32,7 +33,7 @@ def __init__(self, n, weight=False): # n: number of inputs self.weight = weight # apply weights boolean self.iter = range(n - 1) # iter object if weight: - self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True) # layer weights + self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True) # layer weights def forward(self, x): y = x[0] # no weight diff --git a/models/hub/yolov5-bifpn.yaml b/models/hub/yolov5-bifpn.yaml index 2f2c82c70122..504815f5cfa0 100644 --- a/models/hub/yolov5-bifpn.yaml +++ b/models/hub/yolov5-bifpn.yaml @@ -9,22 +9,22 @@ anchors: - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, C3, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, C3, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 3, C3, [1024, False]], # 9 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 ] -# YOLOv5 BiFPN head +# YOLOv5 v6.0 BiFPN head head: [[-1, 1, Conv, [512, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], @@ -37,7 +37,7 @@ head: [-1, 3, C3, [256, False]], # 17 (P3/8-small) [-1, 1, Conv, [256, 3, 2]], - [[-1, 14, 6], 1, Concat, [1]], # cat P4 + [[-1, 14, 6], 1, Concat, [1]], # cat P4 <--- BiFPN change [-1, 3, C3, [512, False]], # 20 (P4/16-medium) [-1, 1, Conv, [512, 3, 2]], diff --git a/models/hub/yolov5-fpn.yaml b/models/hub/yolov5-fpn.yaml index 707b2136cee1..a23e9c6fbf9f 100644 --- a/models/hub/yolov5-fpn.yaml +++ b/models/hub/yolov5-fpn.yaml @@ -9,34 +9,34 @@ anchors: - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 - [-1, 3, Bottleneck, [128]], + [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, BottleneckCSP, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 - [-1, 9, BottleneckCSP, [512]], + [-1, 9, C3, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 6, BottleneckCSP, [1024]], # 9 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 ] -# YOLOv5 FPN head +# YOLOv5 v6.0 FPN head head: - [[-1, 3, BottleneckCSP, [1024, False]], # 10 (P5/32-large) + [[-1, 3, C3, [1024, False]], # 10 (P5/32-large) [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 [-1, 1, Conv, [512, 1, 1]], - [-1, 3, BottleneckCSP, [512, False]], # 14 (P4/16-medium) + [-1, 3, C3, [512, False]], # 14 (P4/16-medium) [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 [-1, 1, Conv, [256, 1, 1]], - [-1, 3, BottleneckCSP, [256, False]], # 18 (P3/8-small) + [-1, 3, C3, [256, False]], # 18 (P3/8-small) [[18, 14, 10], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) ] diff --git a/models/hub/yolov5-p2.yaml b/models/hub/yolov5-p2.yaml index 759e9f92fb29..ffe26ebad182 100644 --- a/models/hub/yolov5-p2.yaml +++ b/models/hub/yolov5-p2.yaml @@ -4,24 +4,24 @@ nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple -anchors: 3 +anchors: 3 # auto-anchor evolves 3 anchors per P output layer -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, C3, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, C3, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 3, C3, [1024, False]], # 9 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 ] -# YOLOv5 head +# YOLOv5 v6.0 head head: [[-1, 1, Conv, [512, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], diff --git a/models/hub/yolov5-p6.yaml b/models/hub/yolov5-p6.yaml index 85e142539ce3..28f3e439cccd 100644 --- a/models/hub/yolov5-p6.yaml +++ b/models/hub/yolov5-p6.yaml @@ -4,26 +4,26 @@ nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple -anchors: 3 +anchors: 3 # auto-anchor 3 anchors per P output layer -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, C3, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, C3, [512]], [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 [-1, 3, C3, [768]], [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 - [-1, 1, SPP, [1024, [3, 5, 7]]], - [-1, 3, C3, [1024, False]], # 11 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 11 ] -# YOLOv5 head +# YOLOv5 v6.0 head head: [[-1, 1, Conv, [768, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], @@ -50,7 +50,7 @@ head: [-1, 1, Conv, [768, 3, 2]], [[-1, 12], 1, Concat, [1]], # cat head P6 - [-1, 3, C3, [1024, False]], # 32 (P5/64-xlarge) + [-1, 3, C3, [1024, False]], # 32 (P6/64-xlarge) [[23, 26, 29, 32], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5, P6) ] diff --git a/models/hub/yolov5-p7.yaml b/models/hub/yolov5-p7.yaml index 88a7a95cbbd1..bd2f5845f884 100644 --- a/models/hub/yolov5-p7.yaml +++ b/models/hub/yolov5-p7.yaml @@ -4,16 +4,16 @@ nc: 80 # number of classes depth_multiple: 1.0 # model depth multiple width_multiple: 1.0 # layer channel multiple -anchors: 3 +anchors: 3 # auto-anchor 3 anchors per P output layer -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, C3, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, C3, [512]], [-1, 1, Conv, [768, 3, 2]], # 7-P5/32 @@ -21,8 +21,8 @@ backbone: [-1, 1, Conv, [1024, 3, 2]], # 9-P6/64 [-1, 3, C3, [1024]], [-1, 1, Conv, [1280, 3, 2]], # 11-P7/128 - [-1, 1, SPP, [1280, [3, 5]]], - [-1, 3, C3, [1280, False]], # 13 + [-1, 3, C3, [1280]], + [-1, 1, SPPF, [1280, 5]], # 13 ] # YOLOv5 head diff --git a/models/hub/yolov5-panet.yaml b/models/hub/yolov5-panet.yaml index 76b9b7e74e33..ccfbf900691c 100644 --- a/models/hub/yolov5-panet.yaml +++ b/models/hub/yolov5-panet.yaml @@ -9,40 +9,40 @@ anchors: - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 - [-1, 3, BottleneckCSP, [128]], + [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, BottleneckCSP, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 - [-1, 9, BottleneckCSP, [512]], + [-1, 9, C3, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 3, BottleneckCSP, [1024, False]], # 9 + [-1, 3, C3, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 ] -# YOLOv5 PANet head +# YOLOv5 v6.0 PANet head head: [[-1, 1, Conv, [512, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 6], 1, Concat, [1]], # cat backbone P4 - [-1, 3, BottleneckCSP, [512, False]], # 13 + [-1, 3, C3, [512, False]], # 13 [-1, 1, Conv, [256, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], [[-1, 4], 1, Concat, [1]], # cat backbone P3 - [-1, 3, BottleneckCSP, [256, False]], # 17 (P3/8-small) + [-1, 3, C3, [256, False]], # 17 (P3/8-small) [-1, 1, Conv, [256, 3, 2]], [[-1, 14], 1, Concat, [1]], # cat head P4 - [-1, 3, BottleneckCSP, [512, False]], # 20 (P4/16-medium) + [-1, 3, C3, [512, False]], # 20 (P4/16-medium) [-1, 1, Conv, [512, 3, 2]], [[-1, 10], 1, Concat, [1]], # cat head P5 - [-1, 3, BottleneckCSP, [1024, False]], # 23 (P5/32-large) + [-1, 3, C3, [1024, False]], # 23 (P5/32-large) [[17, 20, 23], 1, Detect, [nc, anchors]], # Detect(P3, P4, P5) ] diff --git a/models/hub/yolov5s-ghost.yaml b/models/hub/yolov5s-ghost.yaml index dbf2c8e03489..ff9519c3f1aa 100644 --- a/models/hub/yolov5s-ghost.yaml +++ b/models/hub/yolov5s-ghost.yaml @@ -9,22 +9,22 @@ anchors: - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, GhostConv, [128, 3, 2]], # 1-P2/4 [-1, 3, C3Ghost, [128]], [-1, 1, GhostConv, [256, 3, 2]], # 3-P3/8 - [-1, 9, C3Ghost, [256]], + [-1, 6, C3Ghost, [256]], [-1, 1, GhostConv, [512, 3, 2]], # 5-P4/16 [-1, 9, C3Ghost, [512]], [-1, 1, GhostConv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 3, C3Ghost, [1024, False]], # 9 + [-1, 3, C3Ghost, [1024]], + [-1, 1, SPPF, [1024, 5]], # 9 ] -# YOLOv5 head +# YOLOv5 v6.0 head head: [[-1, 1, GhostConv, [512, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], diff --git a/models/hub/yolov5s-transformer.yaml b/models/hub/yolov5s-transformer.yaml index aeac1acb0582..100d7c447527 100644 --- a/models/hub/yolov5s-transformer.yaml +++ b/models/hub/yolov5s-transformer.yaml @@ -9,22 +9,22 @@ anchors: - [30,61, 62,45, 59,119] # P4/16 - [116,90, 156,198, 373,326] # P5/32 -# YOLOv5 backbone +# YOLOv5 v6.0 backbone backbone: # [from, number, module, args] - [[-1, 1, Focus, [64, 3]], # 0-P1/2 + [[-1, 1, Conv, [64, 6, 2, 2]], # 0-P1/2 [-1, 1, Conv, [128, 3, 2]], # 1-P2/4 [-1, 3, C3, [128]], [-1, 1, Conv, [256, 3, 2]], # 3-P3/8 - [-1, 9, C3, [256]], + [-1, 6, C3, [256]], [-1, 1, Conv, [512, 3, 2]], # 5-P4/16 [-1, 9, C3, [512]], [-1, 1, Conv, [1024, 3, 2]], # 7-P5/32 - [-1, 1, SPP, [1024, [5, 9, 13]]], - [-1, 3, C3TR, [1024, False]], # 9 <-------- C3TR() Transformer module + [-1, 3, C3TR, [1024]], # 9 <--- C3TR() Transformer module + [-1, 1, SPPF, [1024, 5]], # 9 ] -# YOLOv5 head +# YOLOv5 v6.0 head head: [[-1, 1, Conv, [512, 1, 1]], [-1, 1, nn.Upsample, [None, 2, 'nearest']], diff --git a/models/tf.py b/models/tf.py index 6c07410e03a5..6de0245cfe50 100644 --- a/models/tf.py +++ b/models/tf.py @@ -28,11 +28,11 @@ import torch.nn as nn from tensorflow import keras -from models.common import Bottleneck, BottleneckCSP, Concat, Conv, C3, DWConv, Focus, SPP, SPPF, autopad +from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad from models.experimental import CrossConv, MixConv2d, attempt_load from models.yolo import Detect -from utils.general import make_divisible, print_args, LOGGER from utils.activations import SiLU +from utils.general import LOGGER, make_divisible, print_args class TFBN(keras.layers.Layer): @@ -98,7 +98,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None): self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv) def call(self, inputs): # x(b,w,h,c) -> y(b,w/2,h/2,4c) - # inputs = inputs / 255. # normalize 0-255 to 0-1 + # inputs = inputs / 255 # normalize 0-255 to 0-1 return self.conv(tf.concat([inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], @@ -227,7 +227,7 @@ def call(self, inputs): if not self.training: # inference y = tf.sigmoid(x[i]) - xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # Normalize xywh to 0-1 to reduce calibration error xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32) @@ -414,7 +414,7 @@ def representative_dataset_gen(dataset, ncalib=100): for n, (path, img, im0s, vid_cap, string) in enumerate(dataset): input = np.transpose(img, [1, 2, 0]) input = np.expand_dims(input, axis=0).astype(np.float32) - input /= 255.0 + input /= 255 yield [input] if n >= ncalib: break diff --git a/models/yolo.py b/models/yolo.py index 38a17d9e7ba4..305f0ca0cc88 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -20,10 +20,10 @@ from models.common import * from models.experimental import * from utils.autoanchor import check_anchor_order -from utils.general import check_version, check_yaml, make_divisible, print_args, LOGGER +from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args from utils.plots import feature_visualization -from utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \ - select_device, time_sync +from utils.torch_utils import (copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, select_device, + time_sync) try: import thop # for FLOPs computation @@ -55,15 +55,15 @@ def forward(self, x): x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() if not self.training: # inference - if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: + if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) y = x[i].sigmoid() if self.inplace: - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh else: # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953 - xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] # xy + xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i] # xy wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] # wh y = torch.cat((xy, wh, y[..., 4:]), -1) z.append(y.view(bs, -1, self.no)) @@ -90,7 +90,7 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None): # model, i else: # is *.yaml import yaml # for torch hub self.yaml_file = Path(cfg).name - with open(cfg, errors='ignore') as f: + with open(cfg, encoding='ascii', errors='ignore') as f: self.yaml = yaml.safe_load(f) # model dict # Define model @@ -201,7 +201,7 @@ def _initialize_biases(self, cf=None): # initialize biases into Detect(), cf is for mi, s in zip(m.m, m.stride): # from b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum()) # cls + b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum()) # cls mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) def _print_biases(self): @@ -306,6 +306,7 @@ def parse_model(d, ch): # model_dict, input_channels(3) parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml') parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') parser.add_argument('--profile', action='store_true', help='profile model speed') + parser.add_argument('--test', action='store_true', help='test all yolo*.yaml') opt = parser.parse_args() opt.cfg = check_yaml(opt.cfg) # check YAML print_args(FILE.stem, opt) @@ -320,6 +321,14 @@ def parse_model(d, ch): # model_dict, input_channels(3) img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device) y = model(img, profile=True) + # Test all models + if opt.test: + for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'): + try: + _ = Model(cfg) + except Exception as e: + print(f'Error in {cfg}: {e}') + # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898) # from torch.utils.tensorboard import SummaryWriter # tb_writer = SummaryWriter('.') diff --git a/setup.cfg b/setup.cfg index 7d25200cdb33..4ca0f0d7aabb 100644 --- a/setup.cfg +++ b/setup.cfg @@ -43,3 +43,9 @@ ignore = F403 E302 F541 + + +[isort] +# https://pycqa.github.io/isort/docs/configuration/options.html +line_length = 120 +multi_line_output = 0 diff --git a/train.py b/train.py index 736edd036fad..698d031ad3c6 100644 --- a/train.py +++ b/train.py @@ -7,13 +7,13 @@ """ import argparse -import logging import math import os import random import sys import time from copy import deepcopy +from datetime import datetime from pathlib import Path import numpy as np @@ -23,7 +23,7 @@ import yaml from torch.cuda import amp from torch.nn.parallel import DistributedDataParallel as DDP -from torch.optim import Adam, SGD, lr_scheduler +from torch.optim import SGD, Adam, lr_scheduler from tqdm import tqdm FILE = Path(__file__).resolve() @@ -37,19 +37,19 @@ from models.yolo import Model from utils.autoanchor import check_anchors from utils.autobatch import check_train_batch_size +from utils.callbacks import Callbacks from utils.datasets import create_dataloader -from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \ - strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \ - check_file, check_yaml, check_suffix, print_args, print_mutation, one_cycle, colorstr, methods, LOGGER from utils.downloads import attempt_download -from utils.loss import ComputeLoss -from utils.plots import plot_labels, plot_evolve -from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \ - torch_distributed_zero_first +from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements, + check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds, + intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle, + print_args, print_mutation, strip_optimizer) +from utils.loggers import Loggers from utils.loggers.wandb.wandb_utils import check_wandb_resume +from utils.loss import ComputeLoss from utils.metrics import fitness -from utils.loggers import Loggers -from utils.callbacks import Callbacks +from utils.plots import plot_evolve, plot_labels +from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1)) # https://pytorch.org/docs/stable/elastic/run.html RANK = int(os.getenv('RANK', -1)) @@ -105,7 +105,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary nc = 1 if single_cls else int(data_dict['nc']) # number of classes names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names'] # class names assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}' # check - is_coco = data.endswith('coco.yaml') and nc == 80 # COCO dataset + is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt') # COCO dataset # Model check_suffix(weights, '.pt') # check weights @@ -200,8 +200,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # DP mode if cuda and RANK == -1 and torch.cuda.device_count() > 1: - logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n' - 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') + LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n' + 'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.') model = torch.nn.DataParallel(model) # SyncBatchNorm @@ -246,9 +246,9 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary # Model parameters nl = de_parallel(model).model[-1].nl # number of detection layers (to scale hyps) - hyp['box'] *= 3. / nl # scale to layers - hyp['cls'] *= nc / 80. * 3. / nl # scale to classes and layers - hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl # scale to image size and layers + hyp['box'] *= 3 / nl # scale to layers + hyp['cls'] *= nc / 80 * 3 / nl # scale to classes and layers + hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl # scale to image size and layers hyp['label_smoothing'] = opt.label_smoothing model.nc = nc # attach number of classes to model model.hyp = hyp # attach hyperparameters to model @@ -293,7 +293,7 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary optimizer.zero_grad() for i, (imgs, targets, paths, _) in pbar: # batch ------------------------------------------------------------- ni = i + nb * epoch # number integrated batches (since train start) - imgs = imgs.to(device, non_blocking=True).float() / 255.0 # uint8 to float32, 0-255 to 0.0-1.0 + imgs = imgs.to(device, non_blocking=True).float() / 255 # uint8 to float32, 0-255 to 0.0-1.0 # Warmup if ni <= nw: @@ -380,7 +380,8 @@ def train(hyp, # path/to/hyp.yaml or hyp dictionary 'ema': deepcopy(ema.ema).half(), 'updates': ema.updates, 'optimizer': optimizer.state_dict(), - 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None} + 'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None, + 'date': datetime.now().isoformat()} # Save last, best and delete torch.save(ckpt, last) diff --git a/tutorial.ipynb b/tutorial.ipynb index 9184a66d3f42..7763a26066e2 100644 --- a/tutorial.ipynb +++ b/tutorial.ipynb @@ -368,7 +368,7 @@ "colab_type": "text" }, "source": [ - "\"Open" + "\"Open" ] }, { @@ -402,26 +402,24 @@ "colab": { "base_uri": "https://localhost:8080/" }, - "outputId": "e2e839d5-d6fc-409c-e44c-0b0b6aa9319d" + "outputId": "3809e5a9-dd41-4577-fe62-5531abf7cca2" }, "source": [ - "!git clone https://github.com/ultralytics/yolov5 # clone repo\n", + "!git clone https://github.com/ultralytics/yolov5 # clone\n", "%cd yolov5\n", - "%pip install -qr requirements.txt # install dependencies\n", + "%pip install -qr requirements.txt # install\n", "\n", - "import torch\n", - "from IPython.display import Image, clear_output # to display images\n", - "\n", - "clear_output()\n", - "print(f\"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})\")" + "from yolov5 import utils\n", + "display = utils.notebook_init() # checks" ], - "execution_count": 11, + "execution_count": 2, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ - "Setup complete. Using torch 1.10.0+cu102 (Tesla V100-SXM2-16GB)\n" + "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n", + "Setup complete ✅\n" ] } ] @@ -438,11 +436,11 @@ "\n", "```shell\n", "python detect.py --source 0 # webcam\n", - " file.jpg # image \n", - " file.mp4 # video\n", + " img.jpg # image \n", + " vid.mp4 # video\n", " path/ # directory\n", " path/*.jpg # glob\n", - " 'https://youtu.be/NUsoVlDFqZg' # YouTube\n", + " 'https://youtu.be/Zgi9g1ksQHc' # YouTube\n", " 'rtsp://example.com/media.mp4' # RTSP, RTMP, HTTP stream\n", "```" ] @@ -458,9 +456,9 @@ }, "source": [ "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images\n", - "Image(filename='runs/detect/exp/zidane.jpg', width=600)" + "display.Image(filename='runs/detect/exp/zidane.jpg', width=600)" ], - "execution_count": 17, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -537,7 +535,7 @@ "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n", "!unzip -q tmp.zip -d ../datasets && rm tmp.zip" ], - "execution_count": 18, + "execution_count": null, "outputs": [ { "output_type": "display_data", @@ -568,7 +566,7 @@ "# Run YOLOv5x on COCO val\n", "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half" ], - "execution_count": 19, + "execution_count": null, "outputs": [ { "output_type": "stream", @@ -726,7 +724,7 @@ "# Train YOLOv5s on COCO128 for 3 epochs\n", "!python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache" ], - "execution_count": 24, + "execution_count": null, "outputs": [ { "output_type": "stream", diff --git a/utils/__init__.py b/utils/__init__.py index e69de29bb2d1..2b0c896364a2 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -0,0 +1,18 @@ +# YOLOv5 🚀 by Ultralytics, GPL-3.0 license +""" +utils/initialization +""" + + +def notebook_init(): + # For YOLOv5 notebooks + print('Checking setup...') + from IPython import display # to display images and clear console output + + from utils.general import emojis + from utils.torch_utils import select_device # YOLOv5 imports + + display.clear_output() + select_device(newline=False) + print(emojis('Setup complete ✅')) + return display diff --git a/utils/activations.py b/utils/activations.py index 62eb532b3f95..4c7d46c32104 100644 --- a/utils/activations.py +++ b/utils/activations.py @@ -19,7 +19,7 @@ class Hardswish(nn.Module): # export-friendly version of nn.Hardswish() @staticmethod def forward(x): # return x * F.hardsigmoid(x) # for torchscript and CoreML - return x * F.hardtanh(x + 3, 0., 6.) / 6. # for torchscript, CoreML and ONNX + return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0 # for torchscript, CoreML and ONNX # Mish https://github.com/digantamisra98/Mish -------------------------------------------------------------------------- diff --git a/utils/augmentations.py b/utils/augmentations.py index b3cbbf913b65..5dcfd49fdd05 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -3,14 +3,13 @@ Image augmentation functions """ -import logging import math import random import cv2 import numpy as np -from utils.general import colorstr, segment2box, resample_segments, check_version +from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box from utils.metrics import bbox_ioa @@ -32,11 +31,11 @@ def __init__(self): A.ImageCompression(quality_lower=75, p=0.0)], bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels'])) - logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p)) + LOGGER.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p)) except ImportError: # package not installed, skip pass except Exception as e: - logging.info(colorstr('albumentations: ') + f'{e}') + LOGGER.info(colorstr('albumentations: ') + f'{e}') def __call__(self, im, labels, p=1.0): if self.transform and random.random() < p: @@ -124,7 +123,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0, border=(0, 0)): - # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) + # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] height = im.shape[0] + border[0] * 2 # shape(h,w,c) diff --git a/utils/autoanchor.py b/utils/autoanchor.py index 6b3c661be2f7..af0aa7de65ac 100644 --- a/utils/autoanchor.py +++ b/utils/autoanchor.py @@ -34,10 +34,10 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640): def metric(k): # compute metric r = wh[:, None] / k[None] - x = torch.min(r, 1. / r).min(2)[0] # ratio metric + x = torch.min(r, 1 / r).min(2)[0] # ratio metric best = x.max(1)[0] # best_x - aat = (x > 1. / thr).float().sum(1).mean() # anchors above threshold - bpr = (best > 1. / thr).float().mean() # best possible recall + aat = (x > 1 / thr).float().sum(1).mean() # anchors above threshold + bpr = (best > 1 / thr).float().mean() # best possible recall return bpr, aat anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1) # current anchors @@ -80,12 +80,12 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen """ from scipy.cluster.vq import kmeans - thr = 1. / thr + thr = 1 / thr prefix = colorstr('autoanchor: ') def metric(k, wh): # compute metrics r = wh[:, None] / k[None] - x = torch.min(r, 1. / r).min(2)[0] # ratio metric + x = torch.min(r, 1 / r).min(2)[0] # ratio metric # x = wh_iou(wh, torch.tensor(k)) # iou metric return x, x.max(1)[0] # x, best_x diff --git a/utils/autobatch.py b/utils/autobatch.py index 168b16f691ab..3f2b4d1a4c38 100644 --- a/utils/autobatch.py +++ b/utils/autobatch.py @@ -35,11 +35,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): return batch_size d = str(device).upper() # 'CUDA:0' - t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3 # (GB) - r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GB) - a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GB) + properties = torch.cuda.get_device_properties(device) # device properties + t = properties.total_memory / 1024 ** 3 # (GiB) + r = torch.cuda.memory_reserved(device) / 1024 ** 3 # (GiB) + a = torch.cuda.memory_allocated(device) / 1024 ** 3 # (GiB) f = t - (r + a) # free inside reserved - print(f'{prefix}{d} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free') + print(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free') batch_sizes = [1, 2, 4, 8, 16] try: @@ -52,5 +53,5 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16): batch_sizes = batch_sizes[:len(y)] p = np.polyfit(batch_sizes, y, deg=1) # first degree polynomial fit b = int((f * fraction - p[1]) / p[0]) # y intercept (optimal batch size) - print(f'{prefix}Using colorstr(batch-size {b}) for {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)') + print(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%)') return b diff --git a/utils/datasets.py b/utils/datasets.py index 7fce122942f7..94acaaa92cd7 100755 --- a/utils/datasets.py +++ b/utils/datasets.py @@ -6,13 +6,12 @@ import glob import hashlib import json -import logging import os import random import shutil import time from itertools import repeat -from multiprocessing.pool import ThreadPool, Pool +from multiprocessing.pool import Pool, ThreadPool from pathlib import Path from threading import Thread from zipfile import ZipFile @@ -22,13 +21,13 @@ import torch import torch.nn.functional as F import yaml -from PIL import Image, ImageOps, ExifTags +from PIL import ExifTags, Image, ImageOps from torch.utils.data import Dataset from tqdm import tqdm from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective -from utils.general import check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, \ - xywh2xyxy, xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER +from utils.general import (LOGGER, check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, xyn2xy, + xywh2xyxy, xywhn2xyxy, xyxy2xywhn) from utils.torch_utils import torch_distributed_zero_first # Parameters @@ -335,7 +334,7 @@ def update(self, i, cap, stream): if success: self.imgs[i] = im else: - LOGGER.warn('WARNING: Video stream unresponsive, please check your IP camera connection.') + LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.') self.imgs[i] *= 0 cap.open(stream) # re-open stream if signal was lost time.sleep(1 / self.fps[i]) # wait time @@ -427,7 +426,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted" tqdm(None, desc=prefix + d, total=n, initial=n) # display cache results if cache['msgs']: - logging.info('\n'.join(cache['msgs'])) # display warnings + LOGGER.info('\n'.join(cache['msgs'])) # display warnings assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}' # Read cache @@ -525,9 +524,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): pbar.close() if msgs: - logging.info('\n'.join(msgs)) + LOGGER.info('\n'.join(msgs)) if nf == 0: - logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}') + LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}') x['hash'] = get_hash(self.label_files + self.img_files) x['results'] = nf, nm, ne, nc, len(self.img_files) x['msgs'] = msgs # warnings @@ -535,9 +534,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''): try: np.save(path, x) # save cache for next time path.with_suffix('.cache.npy').rename(path) # remove .npy suffix - logging.info(f'{prefix}New cache created: {path}') + LOGGER.info(f'{prefix}New cache created: {path}') except Exception as e: - logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # path not writeable + LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}') # not writeable return x def __len__(self): @@ -634,13 +633,13 @@ def collate_fn4(batch): n = len(shapes) // 4 img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n] - ho = torch.tensor([[0., 0, 0, 1, 0, 0]]) - wo = torch.tensor([[0., 0, 1, 0, 0, 0]]) - s = torch.tensor([[1, 1, .5, .5, .5, .5]]) # scale + ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]]) + wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]]) + s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]]) # scale for i in range(n): # zidane torch.zeros(16,3,720,1280) # BCHW i *= 4 if random.random() < 0.5: - im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[ + im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[ 0].type(img[i].type()) l = label[i] else: diff --git a/utils/flask_rest_api/restapi.py b/utils/flask_rest_api/restapi.py index a54e2309715c..b93ad16a0f58 100644 --- a/utils/flask_rest_api/restapi.py +++ b/utils/flask_rest_api/restapi.py @@ -5,8 +5,8 @@ import io import torch -from PIL import Image from flask import Flask, request +from PIL import Image app = Flask(__name__) diff --git a/utils/general.py b/utils/general.py index d8cac8daac22..8f59d487edfb 100755 --- a/utils/general.py +++ b/utils/general.py @@ -45,7 +45,7 @@ def set_logging(name=None, verbose=True): # Sets level and returns logger rank = int(os.getenv('RANK', -1)) # rank in world for Multi-GPU trainings - logging.basicConfig(format="%(message)s", level=logging.INFO if (verbose and rank in (-1, 0)) else logging.WARN) + logging.basicConfig(format="%(message)s", level=logging.INFO if (verbose and rank in (-1, 0)) else logging.WARNING) return logging.getLogger(name) @@ -125,6 +125,11 @@ def init_seeds(seed=0): cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False) +def intersect_dicts(da, db, exclude=()): + # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values + return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} + + def get_latest_run(search_dir='.'): # Return path to most recent 'last.pt' in /runs (i.e. to --resume from) last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True) @@ -259,7 +264,8 @@ def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), insta if isinstance(requirements, (str, Path)): # requirements.txt file file = Path(requirements) assert file.exists(), f"{prefix} {file.resolve()} not found, check failed." - requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude] + with file.open() as f: + requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude] else: # list or tuple of packages requirements = [x for x in requirements if x not in exclude] @@ -338,9 +344,12 @@ def check_file(file, suffix=''): elif file.startswith(('http:/', 'https:/')): # download url = str(Path(file)).replace(':/', '://') # Pathlib turns :// -> :/ file = Path(urllib.parse.unquote(file).split('?')[0]).name # '%2F' to '/', split https://url.com/file.txt?auth - print(f'Downloading {url} to {file}...') - torch.hub.download_url_to_file(url, file) - assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check + if Path(file).is_file(): + print(f'Found {url} locally at {file}') # file already exists + else: + print(f'Downloading {url} to {file}...') + torch.hub.download_url_to_file(url, file) + assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}' # check return file else: # search files = [] @@ -777,7 +786,8 @@ def print_mutation(results, hyp, save_dir, bucket): def apply_classifier(x, model, img, im0): - # Apply a second stage classifier to yolo outputs + # Apply a second stage classifier to YOLO outputs + # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval() im0 = [im0] if isinstance(im0, np.ndarray) else im0 for i, d in enumerate(x): # per image if d is not None and len(d): @@ -802,7 +812,7 @@ def apply_classifier(x, model, img, im0): im = im[:, :, ::-1].transpose(2, 0, 1) # BGR to RGB, to 3x416x416 im = np.ascontiguousarray(im, dtype=np.float32) # uint8 to float32 - im /= 255.0 # 0 - 255 to 0.0 - 1.0 + im /= 255 # 0 - 255 to 0.0 - 1.0 ims.append(im) pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1) # classifier prediction @@ -811,33 +821,16 @@ def apply_classifier(x, model, img, im0): return x -def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True): - # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop - xyxy = torch.tensor(xyxy).view(-1, 4) - b = xyxy2xywh(xyxy) # boxes - if square: - b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square - b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad - xyxy = xywh2xyxy(b).long() - clip_coords(xyxy, im.shape) - crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] - if save: - cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix('.jpg')), crop) - return crop - - def increment_path(path, exist_ok=False, sep='', mkdir=False): # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc. path = Path(path) # os-agnostic if path.exists() and not exist_ok: - suffix = path.suffix - path = path.with_suffix('') + path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '') dirs = glob.glob(f"{path}{sep}*") # similar paths matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs] i = [int(m.groups()[0]) for m in matches if m] # indices n = max(i) + 1 if i else 2 # increment number - path = Path(f"{path}{sep}{n}{suffix}") # update path - dir = path if path.suffix == '' else path.parent # directory - if not dir.exists() and mkdir: - dir.mkdir(parents=True, exist_ok=True) # make directory + path = Path(f"{path}{sep}{n}{suffix}") # increment path + if mkdir: + path.mkdir(parents=True, exist_ok=True) # make directory return path diff --git a/utils/loggers/wandb/sweep.py b/utils/loggers/wandb/sweep.py index 6029f6b8039d..206059bc30bf 100644 --- a/utils/loggers/wandb/sweep.py +++ b/utils/loggers/wandb/sweep.py @@ -8,10 +8,10 @@ if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH -from train import train, parse_opt +from train import parse_opt, train +from utils.callbacks import Callbacks from utils.general import increment_path from utils.torch_utils import select_device -from utils.callbacks import Callbacks def sweep(): diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index 8546ec6c63cb..a71bc6ce96d2 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -16,8 +16,7 @@ if str(ROOT) not in sys.path: sys.path.append(str(ROOT)) # add ROOT to PATH -from utils.datasets import LoadImagesAndLabels -from utils.datasets import img2label_paths +from utils.datasets import LoadImagesAndLabels, img2label_paths from utils.general import check_dataset, check_file try: diff --git a/utils/loss.py b/utils/loss.py index e8ce42ad994a..194c8e503e0e 100644 --- a/utils/loss.py +++ b/utils/loss.py @@ -108,7 +108,7 @@ def __init__(self, model, autobalance=False): BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g) det = model.module.model[-1] if is_parallel(model) else model.model[-1] # Detect() module - self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02]) # P3-P7 + self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02]) # P3-P7 self.ssi = list(det.stride).index(16) if autobalance else 0 # stride 16 index self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance for k in 'na', 'nc', 'nl', 'anchors': @@ -129,7 +129,7 @@ def __call__(self, p, targets): # predictions, targets, model ps = pi[b, a, gj, gi] # prediction subset corresponding to targets # Regression - pxy = ps[:, :2].sigmoid() * 2. - 0.5 + pxy = ps[:, :2].sigmoid() * 2 - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) # predicted box iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) # iou(prediction, target) @@ -189,15 +189,15 @@ def build_targets(self, p, targets): if nt: # Matches r = t[:, :, 4:6] / anchors[:, None] # wh ratio - j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t'] # compare + j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t'] # compare # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t'] # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2)) t = t[j] # filter # Offsets gxy = t[:, 2:4] # grid xy gxi = gain[[2, 3]] - gxy # inverse - j, k = ((gxy % 1. < g) & (gxy > 1.)).T - l, m = ((gxi % 1. < g) & (gxi > 1.)).T + j, k = ((gxy % 1 < g) & (gxy > 1)).T + l, m = ((gxi % 1 < g) & (gxi > 1)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] diff --git a/utils/plots.py b/utils/plots.py index 00cda6d8d986..b5e25d668d22 100644 --- a/utils/plots.py +++ b/utils/plots.py @@ -17,7 +17,7 @@ import torch from PIL import Image, ImageDraw, ImageFont -from utils.general import user_config_dir, is_ascii, is_chinese, xywh2xyxy, xyxy2xywh +from utils.general import clip_coords, increment_path, is_ascii, is_chinese, user_config_dir, xywh2xyxy, xyxy2xywh from utils.metrics import fitness # Settings @@ -117,6 +117,33 @@ def result(self): return np.asarray(self.im) +def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): + """ + x: Features to be visualized + module_type: Module type + stage: Module stage within model + n: Maximum number of feature maps to plot + save_dir: Directory to save results + """ + if 'Detect' not in module_type: + batch, channels, height, width = x.shape # batch, channels, height, width + if height > 1 and width > 1: + f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename + + blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels + n = min(n, channels) # number of plots + fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols + ax = ax.ravel() + plt.subplots_adjust(wspace=0.05, hspace=0.05) + for i in range(n): + ax[i].imshow(blocks[i].squeeze()) # cmap='gray' + ax[i].axis('off') + + print(f'Saving {save_dir / f}... ({n}/{channels})') + plt.savefig(save_dir / f, dpi=300, bbox_inches='tight') + plt.close() + + def hist2d(x, y, n=100): # 2d histogram used in labels.png and evolve.png xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n) @@ -155,7 +182,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max if isinstance(targets, torch.Tensor): targets = targets.cpu().numpy() if np.max(images[0]) <= 1: - images *= 255.0 # de-normalise (optional) + images *= 255 # de-normalise (optional) bs, _, h, w = images.shape # batch size, _, height, width bs = min(bs, max_subplots) # limit plot images ns = np.ceil(bs ** 0.5) # number of subplots (square) @@ -337,37 +364,6 @@ def plot_labels(labels, names=(), save_dir=Path('')): plt.close() -def profile_idetection(start=0, stop=0, labels=(), save_dir=''): - # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() - ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() - s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS'] - files = list(Path(save_dir).glob('frames*.txt')) - for fi, f in enumerate(files): - try: - results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows - n = results.shape[1] # number of rows - x = np.arange(start, min(stop, n) if stop else n) - results = results[:, x] - t = (results[0] - results[0].min()) # set t0=0s - results[0] = x - for i, a in enumerate(ax): - if i < len(results): - label = labels[fi] if len(labels) else f.stem.replace('frames_', '') - a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5) - a.set_title(s[i]) - a.set_xlabel('time (s)') - # if fi == len(files) - 1: - # a.set_ylim(bottom=0) - for side in ['top', 'right']: - a.spines[side].set_visible(False) - else: - a.remove() - except Exception as e: - print(f'Warning: Plotting error for {f}; {e}') - ax[1].legend() - plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200) - - def plot_evolve(evolve_csv='path/to/evolve.csv'): # from utils.plots import *; plot_evolve() # Plot evolve.csv hyp evolution results evolve_csv = Path(evolve_csv) @@ -420,28 +416,48 @@ def plot_results(file='path/to/results.csv', dir=''): plt.close() -def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')): - """ - x: Features to be visualized - module_type: Module type - stage: Module stage within model - n: Maximum number of feature maps to plot - save_dir: Directory to save results - """ - if 'Detect' not in module_type: - batch, channels, height, width = x.shape # batch, channels, height, width - if height > 1 and width > 1: - f = f"stage{stage}_{module_type.split('.')[-1]}_features.png" # filename +def profile_idetection(start=0, stop=0, labels=(), save_dir=''): + # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection() + ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel() + s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS'] + files = list(Path(save_dir).glob('frames*.txt')) + for fi, f in enumerate(files): + try: + results = np.loadtxt(f, ndmin=2).T[:, 90:-30] # clip first and last rows + n = results.shape[1] # number of rows + x = np.arange(start, min(stop, n) if stop else n) + results = results[:, x] + t = (results[0] - results[0].min()) # set t0=0s + results[0] = x + for i, a in enumerate(ax): + if i < len(results): + label = labels[fi] if len(labels) else f.stem.replace('frames_', '') + a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5) + a.set_title(s[i]) + a.set_xlabel('time (s)') + # if fi == len(files) - 1: + # a.set_ylim(bottom=0) + for side in ['top', 'right']: + a.spines[side].set_visible(False) + else: + a.remove() + except Exception as e: + print(f'Warning: Plotting error for {f}; {e}') + ax[1].legend() + plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200) - blocks = torch.chunk(x[0].cpu(), channels, dim=0) # select batch index 0, block by channels - n = min(n, channels) # number of plots - fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True) # 8 rows x n/8 cols - ax = ax.ravel() - plt.subplots_adjust(wspace=0.05, hspace=0.05) - for i in range(n): - ax[i].imshow(blocks[i].squeeze()) # cmap='gray' - ax[i].axis('off') - print(f'Saving {save_dir / f}... ({n}/{channels})') - plt.savefig(save_dir / f, dpi=300, bbox_inches='tight') - plt.close() +def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True): + # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop + xyxy = torch.tensor(xyxy).view(-1, 4) + b = xyxy2xywh(xyxy) # boxes + if square: + b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1) # attempt rectangle to square + b[:, 2:] = b[:, 2:] * gain + pad # box wh * gain + pad + xyxy = xywh2xyxy(b).long() + clip_coords(xyxy, im.shape) + crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)] + if save: + file.parent.mkdir(parents=True, exist_ok=True) # make directory + cv2.imwrite(str(increment_path(file).with_suffix('.jpg')), crop) + return crop diff --git a/utils/torch_utils.py b/utils/torch_utils.py index 6e619d9c6955..16289104eb48 100644 --- a/utils/torch_utils.py +++ b/utils/torch_utils.py @@ -4,7 +4,6 @@ """ import datetime -import logging import math import os import platform @@ -18,7 +17,6 @@ import torch.distributed as dist import torch.nn as nn import torch.nn.functional as F -import torchvision from utils.general import LOGGER @@ -55,7 +53,7 @@ def git_describe(path=Path(__file__).parent): # path must be a directory return '' # not a git repository -def select_device(device='', batch_size=None): +def select_device(device='', batch_size=None, newline=True): # device = 'cpu' or '0' or '0,1,2,3' s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} ' # string device = str(device).strip().lower().replace('cuda:', '') # to string, 'cuda:0' to '0' @@ -79,6 +77,8 @@ def select_device(device='', batch_size=None): else: s += 'CPU\n' + if not newline: + s = s.rstrip() LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s) # emoji-safe return torch.device('cuda:0' if cuda else 'cpu') @@ -100,7 +100,6 @@ def profile(input, ops, n=10, device=None): # profile(input, [m1, m2], n=100) # profile over 100 iterations results = [] - logging.basicConfig(format="%(message)s", level=logging.INFO) device = device or select_device() print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}" f"{'input':>24s}{'output':>24s}") @@ -111,7 +110,7 @@ def profile(input, ops, n=10, device=None): for m in ops if isinstance(ops, list) else [ops]: m = m.to(device) if hasattr(m, 'to') else m # device m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m - tf, tb, t = 0., 0., [0., 0., 0.] # dt forward, backward + tf, tb, t = 0, 0, [0, 0, 0] # dt forward, backward try: flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2 # GFLOPs except: @@ -153,11 +152,6 @@ def de_parallel(model): return model.module if is_parallel(model) else model -def intersect_dicts(da, db, exclude=()): - # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values - return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape} - - def initialize_weights(model): for m in model.modules(): t = type(m) @@ -177,7 +171,7 @@ def find_modules(model, mclass=nn.Conv2d): def sparsity(model): # Return global model sparsity - a, b = 0., 0. + a, b = 0, 0 for p in model.parameters(): a += p.numel() b += (p == 0).sum() @@ -242,25 +236,6 @@ def model_info(model, verbose=False, img_size=640): LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}") -def load_classifier(name='resnet101', n=2): - # Loads a pretrained model reshaped to n-class output - model = torchvision.models.__dict__[name](pretrained=True) - - # ResNet model properties - # input_size = [3, 224, 224] - # input_space = 'RGB' - # input_range = [0, 1] - # mean = [0.485, 0.456, 0.406] - # std = [0.229, 0.224, 0.225] - - # Reshape output to n classes - filters = model.fc.weight.shape[1] - model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True) - model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True) - model.fc.out_features = n - return model - - def scale_img(img, ratio=1.0, same_shape=False, gs=32): # img(16,3,256,416) # scales img(bs,3,y,x) by ratio constrained to gs-multiple if ratio == 1.0: @@ -336,7 +311,7 @@ def update(self, model): for k, v in self.ema.state_dict().items(): if v.dtype.is_floating_point: v *= d - v += (1. - d) * msd[k].detach() + v += (1 - d) * msd[k].detach() def update_attr(self, model, include=(), exclude=('process_group', 'reducer')): # Update EMA attributes diff --git a/val.py b/val.py index 4aab87e275d3..7f23b8704de5 100644 --- a/val.py +++ b/val.py @@ -23,15 +23,15 @@ sys.path.append(str(ROOT)) # add ROOT to PATH ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative -from models.experimental import attempt_load +from models.common import DetectMultiBackend +from utils.callbacks import Callbacks from utils.datasets import create_dataloader -from utils.general import box_iou, coco80_to_coco91_class, colorstr, check_dataset, check_img_size, \ - check_requirements, check_suffix, check_yaml, increment_path, non_max_suppression, print_args, scale_coords, \ - xyxy2xywh, xywh2xyxy, LOGGER -from utils.metrics import ap_per_class, ConfusionMatrix +from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_yaml, + coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args, + scale_coords, xywh2xyxy, xyxy2xywh) +from utils.metrics import ConfusionMatrix, ap_per_class from utils.plots import output_to_target, plot_images, plot_val_study from utils.torch_utils import select_device, time_sync -from utils.callbacks import Callbacks def save_one_txt(predn, save_conf, shape, file): @@ -100,6 +100,7 @@ def run(data, name='exp', # save to project/name exist_ok=False, # existing project/name ok, do not increment half=True, # use FP16 half-precision inference + dnn=False, # use OpenCV DNN for ONNX inference model=None, dataloader=None, save_dir=Path(''), @@ -110,8 +111,10 @@ def run(data, # Initialize/load model and set device training = model is not None if training: # called by train.py - device = next(model.parameters()).device # get model device + device, pt = next(model.parameters()).device, True # get model device, PyTorch model + half &= device.type != 'cpu' # half precision only supported on CUDA + model.half() if half else model.float() else: # called directly device = select_device(device, batch_size=batch_size) @@ -120,22 +123,21 @@ def run(data, (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True) # make dir # Load model - check_suffix(weights, '.pt') - model = attempt_load(weights, map_location=device) # load FP32 model - gs = max(int(model.stride.max()), 32) # grid size (max stride) - imgsz = check_img_size(imgsz, s=gs) # check image size - - # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99 - # if device.type != 'cpu' and torch.cuda.device_count() > 1: - # model = nn.DataParallel(model) + model = DetectMultiBackend(weights, device=device, dnn=dnn) + stride, pt = model.stride, model.pt + imgsz = check_img_size(imgsz, s=stride) # check image size + half &= pt and device.type != 'cpu' # half precision only supported by PyTorch on CUDA + if pt: + model.model.half() if half else model.model.float() + else: + half = False + batch_size = 1 # export.py models default to batch-size 1 + device = torch.device('cpu') + LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends') # Data data = check_dataset(data) # check - # Half - half &= device.type != 'cpu' # half precision only supported on CUDA - model.half() if half else model.float() - # Configure model.eval() is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt') # COCO dataset @@ -145,11 +147,11 @@ def run(data, # Dataloader if not training: - if device.type != 'cpu': - model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters()))) # run once + if pt and device.type != 'cpu': + model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters()))) # warmup pad = 0.0 if task == 'speed' else 0.5 task = task if task in ('train', 'val', 'test') else 'val' # path to train/val/test images - dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True, + dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=pt, prefix=colorstr(f'{task}: '))[0] seen = 0 @@ -161,32 +163,33 @@ def run(data, loss = torch.zeros(3, device=device) jdict, stats, ap, ap_class = [], [], [], [] pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}') # progress bar - for batch_i, (img, targets, paths, shapes) in enumerate(pbar): + for batch_i, (im, targets, paths, shapes) in enumerate(pbar): t1 = time_sync() - img = img.to(device, non_blocking=True) - img = img.half() if half else img.float() # uint8 to fp16/32 - img /= 255.0 # 0 - 255 to 0.0 - 1.0 - targets = targets.to(device) - nb, _, height, width = img.shape # batch size, channels, height, width + if pt: + im = im.to(device, non_blocking=True) + targets = targets.to(device) + im = im.half() if half else im.float() # uint8 to fp16/32 + im /= 255 # 0 - 255 to 0.0 - 1.0 + nb, _, height, width = im.shape # batch size, channels, height, width t2 = time_sync() dt[0] += t2 - t1 - # Run model - out, train_out = model(img, augment=augment) # inference and training outputs + # Inference + out, train_out = model(im) if training else model(im, augment=augment, val=True) # inference, loss outputs dt[1] += time_sync() - t2 - # Compute loss + # Loss if compute_loss: loss += compute_loss([x.float() for x in train_out], targets)[1] # box, obj, cls - # Run NMS + # NMS targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device) # to pixels lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else [] # for autolabelling t3 = time_sync() out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls) dt[2] += time_sync() - t3 - # Statistics per image + # Metrics for si, pred in enumerate(out): labels = targets[targets[:, 0] == si, 1:] nl = len(labels) @@ -203,12 +206,12 @@ def run(data, if single_cls: pred[:, 5] = 0 predn = pred.clone() - scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred + scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1]) # native-space pred # Evaluate if nl: tbox = xywh2xyxy(labels[:, 1:5]) # target boxes - scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels + scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1]) # native-space labels labelsn = torch.cat((labels[:, 0:1], tbox), 1) # native-space labels correct = process_batch(predn, labelsn, iouv) if plots: @@ -222,16 +225,16 @@ def run(data, save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt')) if save_json: save_one_json(predn, jdict, path, class_map) # append to COCO-JSON dictionary - callbacks.run('on_val_image_end', pred, predn, path, names, img[si]) + callbacks.run('on_val_image_end', pred, predn, path, names, im[si]) # Plot images if plots and batch_i < 3: f = save_dir / f'val_batch{batch_i}_labels.jpg' # labels - Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start() f = save_dir / f'val_batch{batch_i}_pred.jpg' # predictions - Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start() + Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start() - # Compute statistics + # Compute metrics stats = [np.concatenate(x, 0) for x in zip(*stats)] # to numpy if len(stats) and stats[0].any(): p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names) @@ -319,6 +322,7 @@ def parse_opt(): parser.add_argument('--name', default='exp', help='save to project/name') parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment') parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference') + parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference') opt = parser.parse_args() opt.data = check_yaml(opt.data) # check YAML opt.save_json |= opt.data.endswith('coco.yaml') @@ -331,6 +335,8 @@ def main(opt): check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop')) if opt.task in ('train', 'val', 'test'): # run normally + if opt.conf_thres > 0.001: # https://github.com/ultralytics/yolov5/issues/1466 + LOGGER.info(f'WARNING: confidence threshold {opt.conf_thres} >> 0.001 will produce invalid mAP values.') run(**vars(opt)) elif opt.task == 'speed': # speed benchmarks