diff --git a/.github/ISSUE_TEMPLATE/bug-report.yml b/.github/ISSUE_TEMPLATE/bug-report.yml
index a20f15c20c93..fcb64138b088 100644
--- a/.github/ISSUE_TEMPLATE/bug-report.yml
+++ b/.github/ISSUE_TEMPLATE/bug-report.yml
@@ -41,8 +41,8 @@ body:
     attributes:
       label: Bug
       description: Provide console output with error messages and/or screenshots of the bug.
-      placeholder: >
-        TIP: Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response.
+      placeholder: |
+        💡 ProTip! Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response.
     validations:
       required: true
 
@@ -51,7 +51,7 @@ body:
       label: Environment
       description: Please specify the software and hardware you used to produce the bug.
       placeholder: |
-        - YOLO: YOLOv5 🚀 v6.0-37-g620b535 torch 1.9.0+cu111 CUDA:0 (Tesla V100-SXM2-16GB, 16160.5MB)
+        - YOLO: YOLOv5 🚀 v6.0-67-g60e42e1 torch 1.9.0+cu111 CUDA:0 (A100-SXM4-40GB, 40536MiB)
         - OS: Ubuntu 20.04
         - Python: 3.9.0
     validations:
@@ -64,7 +64,9 @@ body:
         When asking a question, people will be better able to provide help if you provide code that they can easily understand and use to **reproduce** the problem.
         This is referred to by community members as creating a [minimal reproducible example](https://stackoverflow.com/help/minimal-reproducible-example).
       placeholder: |
-        # code to reproduce your issue here
+        ```
+        # Code to reproduce your issue here
+        ```
     validations:
       required: false
 
diff --git a/.github/ISSUE_TEMPLATE/question.yml b/.github/ISSUE_TEMPLATE/question.yml
index 9ae5dd57c608..8e0993c68bab 100644
--- a/.github/ISSUE_TEMPLATE/question.yml
+++ b/.github/ISSUE_TEMPLATE/question.yml
@@ -22,8 +22,8 @@ body:
     attributes:
       label: Question
       description: What is your question?
-      placeholder: >
-        TIP: Include as much information as possible (screenshots, links, reference etc.) to receive the most helpful response.
+      placeholder: |
+        💡 ProTip! Include as much information as possible (screenshots, logs, tracebacks etc.) to receive the most helpful response.
     validations:
       required: true
 
diff --git a/.github/workflows/code-format.yml b/.github/workflows/code-format.yml
deleted file mode 100644
index 6ebc6cc01c0b..000000000000
--- a/.github/workflows/code-format.yml
+++ /dev/null
@@ -1,47 +0,0 @@
-# Run code formatting GitHub Action, can be replaced by this bot: https://github.com/marketplace/pre-commit-ci
-
-name: Code formatting
-
-on: # https://help.github.com/en/actions/reference/events-that-trigger-workflows
-  push:
-    branches: [master]
-  pull_request: {}
-
-jobs:
-  pep8-check-flake8:
-    runs-on: ubuntu-20.04
-    steps:
-      - uses: actions/checkout@master
-      - uses: actions/setup-python@v2
-        with:
-          python-version: 3.7
-      - name: Install dependencies
-        run: |
-          pip install flake8
-          pip list
-        shell: bash
-      - name: PEP8
-        run: |
-          flake8 .
-
-  pre-commit-check:
-    runs-on: ubuntu-20.04
-    steps:
-      - uses: actions/checkout@v2
-        # for private repo - first is the checkout step, which needs to use unlimited fetch depth for pushing
-        with:
-          fetch-depth: 0
-      - uses: actions/setup-python@v2
-
-      - name: set PY
-        run: echo "PY=$(python -VV | sha256sum | cut -d' ' -f1)" >> $GITHUB_ENV
-      - uses: actions/cache@v2
-        with:
-          path: ~/.cache/pre-commit
-          key: pre-commit|${{ env.PY }}|${{ hashFiles('.pre-commit-config.yaml') }}
-
-      - uses: pre-commit/action@v2.0.3
-        # this action also provides an additional behaviour when used in private repositories
-        # when configured with a github token, the action will push back fixes to the pull request branch
-        with:
-          token: ${{ secrets.GITHUB_TOKEN }}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 2eb78aa17ef4..48e752f448f1 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -30,12 +30,11 @@ repos:
         args: [--py36-plus]
         name: Upgrade code
 
-  # TODO
-  #- repo: https://github.com/PyCQA/isort
-  #  rev: 5.9.3
-  #  hooks:
-  #    - id: isort
-  #      name: imports
+  - repo: https://github.com/PyCQA/isort
+    rev: 5.9.3
+    hooks:
+      - id: isort
+        name: Sort imports
 
   # TODO
   #- repo: https://github.com/pre-commit/mirrors-yapf
diff --git a/Dockerfile b/Dockerfile
index 0ee89b432b8f..fe1acb0a6540 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,7 +1,7 @@
 # YOLOv5 🚀 by Ultralytics, GPL-3.0 license
 
 # Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
-FROM nvcr.io/nvidia/pytorch:21.05-py3
+FROM nvcr.io/nvidia/pytorch:21.10-py3
 
 # Install linux packages
 RUN apt update && apt install -y zip htop screen libgl1-mesa-glx
@@ -11,8 +11,8 @@ COPY requirements.txt .
 RUN python -m pip install --upgrade pip
 RUN pip uninstall -y nvidia-tensorboard nvidia-tensorboard-plugin-dlprof
 RUN pip install --no-cache -r requirements.txt coremltools onnx gsutil notebook wandb>=0.12.2
-RUN pip install --no-cache -U torch torchvision numpy
-# RUN pip install --no-cache torch==1.9.1+cu111 torchvision==0.10.1+cu111 -f https://download.pytorch.org/whl/torch_stable.html
+RUN pip install --no-cache -U torch torchvision numpy Pillow
+# RUN pip install --no-cache torch==1.10.0+cu113 torchvision==0.11.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
 
 # Create working directory
 RUN mkdir -p /usr/src/app
diff --git a/README.md b/README.md
index 3e2f5b656cde..6e72d85da7ee 100644
--- a/README.md
+++ b/README.md
@@ -109,11 +109,11 @@ the [latest YOLOv5 release](https://github.com/ultralytics/yolov5/releases) and
 
 ```bash
 $ python detect.py --source 0  # webcam
-                            file.jpg  # image
-                            file.mp4  # video
+                            img.jpg  # image
+                            vid.mp4  # video
                             path/  # directory
                             path/*.jpg  # glob
-                            'https://youtu.be/NUsoVlDFqZg'  # YouTube
+                            'https://youtu.be/Zgi9g1ksQHc'  # YouTube
                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 ```
 
diff --git a/data/coco128.yaml b/data/coco128.yaml
index b1dfb004afa1..84a91b18359d 100644
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@@ -27,4 +27,4 @@ names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 't
 
 
 # Download script/URL (optional)
-download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
+download: https://ultralytics.com/assets/coco128.zip
diff --git a/detect.py b/detect.py
index f9c7bac3fca2..108f8f138052 100644
--- a/detect.py
+++ b/detect.py
@@ -3,7 +3,13 @@
 Run inference on images, videos, directories, streams, etc.
 
 Usage:
-    $ python path/to/detect.py --source path/to/img.jpg --weights yolov5s.pt --img 640
+    $ python path/to/detect.py --weights yolov5s.pt --source 0  # webcam
+                                                             img.jpg  # image
+                                                             vid.mp4  # video
+                                                             path/  # directory
+                                                             path/*.jpg  # glob
+                                                             'https://youtu.be/Zgi9g1ksQHc'  # YouTube
+                                                             'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream
 """
 
 import argparse
@@ -12,7 +18,6 @@
 from pathlib import Path
 
 import cv2
-import numpy as np
 import torch
 import torch.backends.cudnn as cudnn
 
@@ -22,12 +27,12 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
-from utils.datasets import LoadImages, LoadStreams
-from utils.general import apply_classifier, check_img_size, check_imshow, check_requirements, check_suffix, colorstr, \
-    increment_path, non_max_suppression, print_args, save_one_box, scale_coords, strip_optimizer, xyxy2xywh, LOGGER
-from utils.plots import Annotator, colors
-from utils.torch_utils import load_classifier, select_device, time_sync
+from models.common import DetectMultiBackend
+from utils.datasets import IMG_FORMATS, VID_FORMATS, LoadImages, LoadStreams
+from utils.general import (LOGGER, check_file, check_img_size, check_imshow, check_requirements, colorstr,
+                           increment_path, non_max_suppression, print_args, scale_coords, strip_optimizer, xyxy2xywh)
+from utils.plots import Annotator, colors, save_one_box
+from utils.torch_utils import select_device, time_sync
 
 
 @torch.no_grad()
@@ -59,120 +64,55 @@ def run(weights=ROOT / 'yolov5s.pt',  # model.pt path(s)
         ):
     source = str(source)
     save_img = not nosave and not source.endswith('.txt')  # save inference images
-    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
-        ('rtsp://', 'rtmp://', 'http://', 'https://'))
+    is_file = Path(source).suffix[1:] in (IMG_FORMATS + VID_FORMATS)
+    is_url = source.lower().startswith(('rtsp://', 'rtmp://', 'http://', 'https://'))
+    webcam = source.isnumeric() or source.endswith('.txt') or (is_url and not is_file)
+    if is_url and is_file:
+        source = check_file(source)  # download
 
     # Directories
     save_dir = increment_path(Path(project) / name, exist_ok=exist_ok)  # increment run
     (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
-    # Initialize
+    # Load model
     device = select_device(device)
-    half &= device.type != 'cpu'  # half precision only supported on CUDA
+    model = DetectMultiBackend(weights, device=device, dnn=dnn)
+    stride, names, pt, jit, onnx = model.stride, model.names, model.pt, model.jit, model.onnx
+    imgsz = check_img_size(imgsz, s=stride)  # check image size
 
-    # Load model
-    w = str(weights[0] if isinstance(weights, list) else weights)
-    classify, suffix, suffixes = False, Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '']
-    check_suffix(w, suffixes)  # check weights have acceptable suffix
-    pt, onnx, tflite, pb, saved_model = (suffix == x for x in suffixes)  # backend booleans
-    stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+    # Half
+    half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
     if pt:
-        model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
-        stride = int(model.stride.max())  # model stride
-        names = model.module.names if hasattr(model, 'module') else model.names  # get class names
-        if half:
-            model.half()  # to FP16
-        if classify:  # second-stage classifier
-            modelc = load_classifier(name='resnet50', n=2)  # initialize
-            modelc.load_state_dict(torch.load('resnet50.pt', map_location=device)['model']).to(device).eval()
-    elif onnx:
-        if dnn:
-            check_requirements(('opencv-python>=4.5.4',))
-            net = cv2.dnn.readNetFromONNX(w)
-        else:
-            check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
-            import onnxruntime
-            session = onnxruntime.InferenceSession(w, None)
-    else:  # TensorFlow models
-        import tensorflow as tf
-        if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
-            def wrap_frozen_graph(gd, inputs, outputs):
-                x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped import
-                return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
-                               tf.nest.map_structure(x.graph.as_graph_element, outputs))
-
-            graph_def = tf.Graph().as_graph_def()
-            graph_def.ParseFromString(open(w, 'rb').read())
-            frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
-        elif saved_model:
-            model = tf.keras.models.load_model(w)
-        elif tflite:
-            interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
-            interpreter.allocate_tensors()  # allocate
-            input_details = interpreter.get_input_details()  # inputs
-            output_details = interpreter.get_output_details()  # outputs
-            int8 = input_details[0]['dtype'] == np.uint8  # is TFLite quantized uint8 model
-    imgsz = check_img_size(imgsz, s=stride)  # check image size
+        model.model.half() if half else model.model.float()
 
     # Dataloader
     if webcam:
         view_img = check_imshow()
         cudnn.benchmark = True  # set True to speed up constant image size inference
-        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadStreams(source, img_size=imgsz, stride=stride, auto=pt and not jit)
         bs = len(dataset)  # batch_size
     else:
-        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt)
+        dataset = LoadImages(source, img_size=imgsz, stride=stride, auto=pt and not jit)
         bs = 1  # batch_size
     vid_path, vid_writer = [None] * bs, [None] * bs
 
     # Run inference
     if pt and device.type != 'cpu':
-        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.parameters())))  # run once
+        model(torch.zeros(1, 3, *imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
     dt, seen = [0.0, 0.0, 0.0], 0
-    for path, img, im0s, vid_cap, s in dataset:
+    for path, im, im0s, vid_cap, s in dataset:
         t1 = time_sync()
-        if onnx:
-            img = img.astype('float32')
-        else:
-            img = torch.from_numpy(img).to(device)
-            img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        if len(img.shape) == 3:
-            img = img[None]  # expand for batch dim
+        im = torch.from_numpy(im).to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        if len(im.shape) == 3:
+            im = im[None]  # expand for batch dim
         t2 = time_sync()
         dt[0] += t2 - t1
 
         # Inference
-        if pt:
-            visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
-            pred = model(img, augment=augment, visualize=visualize)[0]
-        elif onnx:
-            if dnn:
-                net.setInput(img)
-                pred = torch.tensor(net.forward())
-            else:
-                pred = torch.tensor(session.run([session.get_outputs()[0].name], {session.get_inputs()[0].name: img}))
-        else:  # tensorflow model (tflite, pb, saved_model)
-            imn = img.permute(0, 2, 3, 1).cpu().numpy()  # image in numpy
-            if pb:
-                pred = frozen_func(x=tf.constant(imn)).numpy()
-            elif saved_model:
-                pred = model(imn, training=False).numpy()
-            elif tflite:
-                if int8:
-                    scale, zero_point = input_details[0]['quantization']
-                    imn = (imn / scale + zero_point).astype(np.uint8)  # de-scale
-                interpreter.set_tensor(input_details[0]['index'], imn)
-                interpreter.invoke()
-                pred = interpreter.get_tensor(output_details[0]['index'])
-                if int8:
-                    scale, zero_point = output_details[0]['quantization']
-                    pred = (pred.astype(np.float32) - zero_point) * scale  # re-scale
-            pred[..., 0] *= imgsz[1]  # x
-            pred[..., 1] *= imgsz[0]  # y
-            pred[..., 2] *= imgsz[1]  # w
-            pred[..., 3] *= imgsz[0]  # h
-            pred = torch.tensor(pred)
+        visualize = increment_path(save_dir / Path(path).stem, mkdir=True) if visualize else False
+        pred = model(im, augment=augment, visualize=visualize)
         t3 = time_sync()
         dt[1] += t3 - t2
 
@@ -181,8 +121,7 @@ def wrap_frozen_graph(gd, inputs, outputs):
         dt[2] += time_sync() - t3
 
         # Second-stage classifier (optional)
-        if classify:
-            pred = apply_classifier(pred, modelc, img, im0s)
+        # pred = utils.general.apply_classifier(pred, classifier_model, im, im0s)
 
         # Process predictions
         for i, det in enumerate(pred):  # per image
@@ -194,15 +133,15 @@ def wrap_frozen_graph(gd, inputs, outputs):
                 p, im0, frame = path, im0s.copy(), getattr(dataset, 'frame', 0)
 
             p = Path(p)  # to Path
-            save_path = str(save_dir / p.name)  # img.jpg
-            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # img.txt
-            s += '%gx%g ' % img.shape[2:]  # print string
+            save_path = str(save_dir / p.name)  # im.jpg
+            txt_path = str(save_dir / 'labels' / p.stem) + ('' if dataset.mode == 'image' else f'_{frame}')  # im.txt
+            s += '%gx%g ' % im.shape[2:]  # print string
             gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
             imc = im0.copy() if save_crop else im0  # for save_crop
             annotator = Annotator(im0, line_width=line_thickness, example=str(names))
             if len(det):
                 # Rescale boxes from img_size to im0 size
-                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+                det[:, :4] = scale_coords(im.shape[2:], det[:, :4], im0.shape).round()
 
                 # Print results
                 for c in det[:, -1].unique():
diff --git a/export.py b/export.py
index 47dbcab50144..4cf30e34fc7b 100644
--- a/export.py
+++ b/export.py
@@ -21,6 +21,7 @@
 """
 
 import argparse
+import json
 import os
 import subprocess
 import sys
@@ -42,8 +43,8 @@
 from models.yolo import Detect
 from utils.activations import SiLU
 from utils.datasets import LoadImages
-from utils.general import check_dataset, check_img_size, check_requirements, colorstr, file_size, print_args, \
-    url2file, LOGGER
+from utils.general import (LOGGER, check_dataset, check_img_size, check_requirements, colorstr, file_size, print_args,
+                           url2file)
 from utils.torch_utils import select_device
 
 
@@ -54,7 +55,9 @@ def export_torchscript(model, im, file, optimize, prefix=colorstr('TorchScript:'
         f = file.with_suffix('.torchscript.pt')
 
         ts = torch.jit.trace(model, im, strict=False)
-        (optimize_for_mobile(ts) if optimize else ts).save(f)
+        d = {"shape": im.shape, "stride": int(max(model.stride)), "names": model.names}
+        extra_files = {'config.txt': json.dumps(d)}  # torch._C.ExtraFilesMap()
+        (optimize_for_mobile(ts) if optimize else ts).save(f, _extra_files=extra_files)
 
         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
     except Exception as e:
@@ -117,7 +120,7 @@ def export_coreml(model, im, file, prefix=colorstr('CoreML:')):
 
         model.train()  # CoreML exports should be placed in model.train() mode
         ts = torch.jit.trace(model, im, strict=False)  # TorchScript model
-        ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        ct_model = ct.convert(ts, inputs=[ct.ImageType('image', shape=im.shape, scale=1 / 255, bias=[0, 0, 0])])
         ct_model.save(f)
 
         LOGGER.info(f'{prefix} export success, saved as {f} ({file_size(f):.1f} MB)')
@@ -135,7 +138,8 @@ def export_saved_model(model, im, file, dynamic,
     try:
         import tensorflow as tf
         from tensorflow import keras
-        from models.tf import TFModel, TFDetect
+
+        from models.tf import TFDetect, TFModel
 
         LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
         f = str(file).replace('.pt', '_saved_model')
@@ -182,6 +186,7 @@ def export_tflite(keras_model, im, file, int8, data, ncalib, prefix=colorstr('Te
     # YOLOv5 TensorFlow Lite export
     try:
         import tensorflow as tf
+
         from models.tf import representative_dataset_gen
 
         LOGGER.info(f'\n{prefix} starting export with tensorflow {tf.__version__}...')
@@ -215,6 +220,7 @@ def export_tfjs(keras_model, im, file, prefix=colorstr('TensorFlow.js:')):
     try:
         check_requirements(('tensorflowjs',))
         import re
+
         import tensorflowjs as tfjs
 
         LOGGER.info(f'\n{prefix} starting export with tensorflowjs {tfjs.__version__}...')
diff --git a/hubconf.py b/hubconf.py
index a697e033b09b..3488fef76ac5 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -27,10 +27,10 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
     """
     from pathlib import Path
 
-    from models.yolo import Model
     from models.experimental import attempt_load
-    from utils.general import check_requirements, set_logging
+    from models.yolo import Model
     from utils.downloads import attempt_download
+    from utils.general import check_requirements, intersect_dicts, set_logging
     from utils.torch_utils import select_device
 
     file = Path(__file__).resolve()
@@ -49,9 +49,8 @@ def _create(name, pretrained=True, channels=3, classes=80, autoshape=True, verbo
             model = Model(cfg, channels, classes)  # create model
             if pretrained:
                 ckpt = torch.load(attempt_download(path), map_location=device)  # load
-                msd = model.state_dict()  # model state_dict
                 csd = ckpt['model'].float().state_dict()  # checkpoint state_dict as FP32
-                csd = {k: v for k, v in csd.items() if msd[k].shape == v.shape}  # filter
+                csd = intersect_dicts(csd, model.state_dict(), exclude=['anchors'])  # intersect
                 model.load_state_dict(csd, strict=False)  # load
                 if len(ckpt['model'].names) == classes:
                     model.names = ckpt['model'].names  # set class names attribute
@@ -125,10 +124,11 @@ def yolov5x6(pretrained=True, channels=3, classes=80, autoshape=True, verbose=Tr
     # model = custom(path='path/to/model.pt')  # custom
 
     # Verify inference
+    from pathlib import Path
+
     import cv2
     import numpy as np
     from PIL import Image
-    from pathlib import Path
 
     imgs = ['data/images/zidane.jpg',  # filename
             Path('data/images/zidane.jpg'),  # Path
diff --git a/models/common.py b/models/common.py
index 8b70a6fea595..3ea7ba5477a6 100644
--- a/models/common.py
+++ b/models/common.py
@@ -3,12 +3,14 @@
 Common modules
 """
 
-import logging
+import json
 import math
+import platform
 import warnings
 from copy import copy
 from pathlib import Path
 
+import cv2
 import numpy as np
 import pandas as pd
 import requests
@@ -18,13 +20,11 @@
 from torch.cuda import amp
 
 from utils.datasets import exif_transpose, letterbox
-from utils.general import colorstr, increment_path, make_divisible, non_max_suppression, save_one_box, \
-    scale_coords, xyxy2xywh
-from utils.plots import Annotator, colors
+from utils.general import (LOGGER, check_requirements, check_suffix, colorstr, increment_path, make_divisible,
+                           non_max_suppression, scale_coords, xywh2xyxy, xyxy2xywh)
+from utils.plots import Annotator, colors, save_one_box
 from utils.torch_utils import time_sync
 
-LOGGER = logging.getLogger(__name__)
-
 
 def autopad(k, p=None):  # kernel, padding
     # Pad to 'same'
@@ -273,6 +273,128 @@ def forward(self, x):
         return torch.cat(x, self.d)
 
 
+class DetectMultiBackend(nn.Module):
+    # YOLOv5 MultiBackend class for python inference on various backends
+    def __init__(self, weights='yolov5s.pt', device=None, dnn=True):
+        # Usage:
+        #   PyTorch:      weights = *.pt
+        #   TorchScript:            *.torchscript.pt
+        #   CoreML:                 *.mlmodel
+        #   TensorFlow:             *_saved_model
+        #   TensorFlow:             *.pb
+        #   TensorFlow Lite:        *.tflite
+        #   ONNX Runtime:           *.onnx
+        #   OpenCV DNN:             *.onnx with dnn=True
+        super().__init__()
+        w = str(weights[0] if isinstance(weights, list) else weights)
+        suffix, suffixes = Path(w).suffix.lower(), ['.pt', '.onnx', '.tflite', '.pb', '', '.mlmodel']
+        check_suffix(w, suffixes)  # check weights have acceptable suffix
+        pt, onnx, tflite, pb, saved_model, coreml = (suffix == x for x in suffixes)  # backend booleans
+        jit = pt and 'torchscript' in w.lower()
+        stride, names = 64, [f'class{i}' for i in range(1000)]  # assign defaults
+
+        if jit:  # TorchScript
+            LOGGER.info(f'Loading {w} for TorchScript inference...')
+            extra_files = {'config.txt': ''}  # model metadata
+            model = torch.jit.load(w, _extra_files=extra_files)
+            if extra_files['config.txt']:
+                d = json.loads(extra_files['config.txt'])  # extra_files dict
+                stride, names = int(d['stride']), d['names']
+        elif pt:  # PyTorch
+            from models.experimental import attempt_load  # scoped to avoid circular import
+            model = torch.jit.load(w) if 'torchscript' in w else attempt_load(weights, map_location=device)
+            stride = int(model.stride.max())  # model stride
+            names = model.module.names if hasattr(model, 'module') else model.names  # get class names
+        elif coreml:  # CoreML *.mlmodel
+            import coremltools as ct
+            model = ct.models.MLModel(w)
+        elif dnn:  # ONNX OpenCV DNN
+            LOGGER.info(f'Loading {w} for ONNX OpenCV DNN inference...')
+            check_requirements(('opencv-python>=4.5.4',))
+            net = cv2.dnn.readNetFromONNX(w)
+        elif onnx:  # ONNX Runtime
+            LOGGER.info(f'Loading {w} for ONNX Runtime inference...')
+            check_requirements(('onnx', 'onnxruntime-gpu' if torch.has_cuda else 'onnxruntime'))
+            import onnxruntime
+            session = onnxruntime.InferenceSession(w, None)
+        else:  # TensorFlow model (TFLite, pb, saved_model)
+            import tensorflow as tf
+            if pb:  # https://www.tensorflow.org/guide/migrate#a_graphpb_or_graphpbtxt
+                def wrap_frozen_graph(gd, inputs, outputs):
+                    x = tf.compat.v1.wrap_function(lambda: tf.compat.v1.import_graph_def(gd, name=""), [])  # wrapped
+                    return x.prune(tf.nest.map_structure(x.graph.as_graph_element, inputs),
+                                   tf.nest.map_structure(x.graph.as_graph_element, outputs))
+
+                LOGGER.info(f'Loading {w} for TensorFlow *.pb inference...')
+                graph_def = tf.Graph().as_graph_def()
+                graph_def.ParseFromString(open(w, 'rb').read())
+                frozen_func = wrap_frozen_graph(gd=graph_def, inputs="x:0", outputs="Identity:0")
+            elif saved_model:
+                LOGGER.info(f'Loading {w} for TensorFlow saved_model inference...')
+                model = tf.keras.models.load_model(w)
+            elif tflite:  # https://www.tensorflow.org/lite/guide/python#install_tensorflow_lite_for_python
+                if 'edgetpu' in w.lower():
+                    LOGGER.info(f'Loading {w} for TensorFlow Edge TPU inference...')
+                    import tflite_runtime.interpreter as tfli
+                    delegate = {'Linux': 'libedgetpu.so.1',  # install https://coral.ai/software/#edgetpu-runtime
+                                'Darwin': 'libedgetpu.1.dylib',
+                                'Windows': 'edgetpu.dll'}[platform.system()]
+                    interpreter = tfli.Interpreter(model_path=w, experimental_delegates=[tfli.load_delegate(delegate)])
+                else:
+                    LOGGER.info(f'Loading {w} for TensorFlow Lite inference...')
+                    interpreter = tf.lite.Interpreter(model_path=w)  # load TFLite model
+                interpreter.allocate_tensors()  # allocate
+                input_details = interpreter.get_input_details()  # inputs
+                output_details = interpreter.get_output_details()  # outputs
+        self.__dict__.update(locals())  # assign all variables to self
+
+    def forward(self, im, augment=False, visualize=False, val=False):
+        # YOLOv5 MultiBackend inference
+        b, ch, h, w = im.shape  # batch, channel, height, width
+        if self.pt:  # PyTorch
+            y = self.model(im) if self.jit else self.model(im, augment=augment, visualize=visualize)
+            return y if val else y[0]
+        elif self.coreml:  # CoreML *.mlmodel
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            im = Image.fromarray((im[0] * 255).astype('uint8'))
+            # im = im.resize((192, 320), Image.ANTIALIAS)
+            y = self.model.predict({'image': im})  # coordinates are xywh normalized
+            box = xywh2xyxy(y['coordinates'] * [[w, h, w, h]])  # xyxy pixels
+            conf, cls = y['confidence'].max(1), y['confidence'].argmax(1).astype(np.float)
+            y = np.concatenate((box, conf.reshape(-1, 1), cls.reshape(-1, 1)), 1)
+        elif self.onnx:  # ONNX
+            im = im.cpu().numpy()  # torch to numpy
+            if self.dnn:  # ONNX OpenCV DNN
+                self.net.setInput(im)
+                y = self.net.forward()
+            else:  # ONNX Runtime
+                y = self.session.run([self.session.get_outputs()[0].name], {self.session.get_inputs()[0].name: im})[0]
+        else:  # TensorFlow model (TFLite, pb, saved_model)
+            im = im.permute(0, 2, 3, 1).cpu().numpy()  # torch BCHW to numpy BHWC shape(1,320,192,3)
+            if self.pb:
+                y = self.frozen_func(x=self.tf.constant(im)).numpy()
+            elif self.saved_model:
+                y = self.model(im, training=False).numpy()
+            elif self.tflite:
+                input, output = self.input_details[0], self.output_details[0]
+                int8 = input['dtype'] == np.uint8  # is TFLite quantized uint8 model
+                if int8:
+                    scale, zero_point = input['quantization']
+                    im = (im / scale + zero_point).astype(np.uint8)  # de-scale
+                self.interpreter.set_tensor(input['index'], im)
+                self.interpreter.invoke()
+                y = self.interpreter.get_tensor(output['index'])
+                if int8:
+                    scale, zero_point = output['quantization']
+                    y = (y.astype(np.float32) - zero_point) * scale  # re-scale
+            y[..., 0] *= w  # x
+            y[..., 1] *= h  # y
+            y[..., 2] *= w  # w
+            y[..., 3] *= h  # h
+        y = torch.tensor(y)
+        return (y, []) if val else y
+
+
 class AutoShape(nn.Module):
     # YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
     conf = 0.25  # NMS confidence threshold
@@ -339,7 +461,7 @@ def forward(self, imgs, size=640, augment=False, profile=False):
         x = [letterbox(im, new_shape=shape1, auto=False)[0] for im in imgs]  # pad
         x = np.stack(x, 0) if n > 1 else x[0][None]  # stack
         x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
-        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255  # uint8 to fp16/32
         t.append(time_sync())
 
         with amp.autocast(enabled=p.device.type != 'cpu'):
@@ -362,7 +484,7 @@ class Detections:
     def __init__(self, imgs, pred, files, times=None, names=None, shape=None):
         super().__init__()
         d = pred[0].device  # device
-        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1., 1.], device=d) for im in imgs]  # normalizations
+        gn = [torch.tensor([*(im.shape[i] for i in [1, 0, 1, 0]), 1, 1], device=d) for im in imgs]  # normalizations
         self.imgs = imgs  # list of images as numpy arrays
         self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
         self.names = names  # class names
diff --git a/models/experimental.py b/models/experimental.py
index 2e92ccb36faf..463e5514a06e 100644
--- a/models/experimental.py
+++ b/models/experimental.py
@@ -3,6 +3,7 @@
 Experimental modules
 """
 import math
+
 import numpy as np
 import torch
 import torch.nn as nn
@@ -32,7 +33,7 @@ def __init__(self, n, weight=False):  # n: number of inputs
         self.weight = weight  # apply weights boolean
         self.iter = range(n - 1)  # iter object
         if weight:
-            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
+            self.w = nn.Parameter(-torch.arange(1.0, n) / 2, requires_grad=True)  # layer weights
 
     def forward(self, x):
         y = x[0]  # no weight
diff --git a/models/hub/yolov5-bifpn.yaml b/models/hub/yolov5-bifpn.yaml
index 2f2c82c70122..504815f5cfa0 100644
--- a/models/hub/yolov5-bifpn.yaml
+++ b/models/hub/yolov5-bifpn.yaml
@@ -9,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 BiFPN head
+# YOLOv5 v6.0 BiFPN head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
@@ -37,7 +37,7 @@ head:
    [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
 
    [-1, 1, Conv, [256, 3, 2]],
-   [[-1, 14, 6], 1, Concat, [1]],  # cat P4
+   [[-1, 14, 6], 1, Concat, [1]],  # cat P4 <--- BiFPN change
    [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
 
    [-1, 1, Conv, [512, 3, 2]],
diff --git a/models/hub/yolov5-fpn.yaml b/models/hub/yolov5-fpn.yaml
index 707b2136cee1..a23e9c6fbf9f 100644
--- a/models/hub/yolov5-fpn.yaml
+++ b/models/hub/yolov5-fpn.yaml
@@ -9,34 +9,34 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, Bottleneck, [128]],
+   [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, BottleneckCSP, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, BottleneckCSP, [512]],
+   [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 6, BottleneckCSP, [1024]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 FPN head
+# YOLOv5 v6.0 FPN head
 head:
-  [[-1, 3, BottleneckCSP, [1024, False]],  # 10 (P5/32-large)
+  [[-1, 3, C3, [1024, False]],  # 10 (P5/32-large)
 
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
    [-1, 1, Conv, [512, 1, 1]],
-   [-1, 3, BottleneckCSP, [512, False]],  # 14 (P4/16-medium)
+   [-1, 3, C3, [512, False]],  # 14 (P4/16-medium)
 
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
    [-1, 1, Conv, [256, 1, 1]],
-   [-1, 3, BottleneckCSP, [256, False]],  # 18 (P3/8-small)
+   [-1, 3, C3, [256, False]],  # 18 (P3/8-small)
 
    [[18, 14, 10], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/hub/yolov5-p2.yaml b/models/hub/yolov5-p2.yaml
index 759e9f92fb29..ffe26ebad182 100644
--- a/models/hub/yolov5-p2.yaml
+++ b/models/hub/yolov5-p2.yaml
@@ -4,24 +4,24 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3
+anchors: 3  # auto-anchor evolves 3 anchors per P output layer
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/hub/yolov5-p6.yaml b/models/hub/yolov5-p6.yaml
index 85e142539ce3..28f3e439cccd 100644
--- a/models/hub/yolov5-p6.yaml
+++ b/models/hub/yolov5-p6.yaml
@@ -4,26 +4,26 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3
+anchors: 3  # auto-anchor 3 anchors per P output layer
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
    [-1, 3, C3, [768]],
    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
-   [-1, 1, SPP, [1024, [3, 5, 7]]],
-   [-1, 3, C3, [1024, False]],  # 11
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 11
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [768, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
@@ -50,7 +50,7 @@ head:
 
    [-1, 1, Conv, [768, 3, 2]],
    [[-1, 12], 1, Concat, [1]],  # cat head P6
-   [-1, 3, C3, [1024, False]],  # 32 (P5/64-xlarge)
+   [-1, 3, C3, [1024, False]],  # 32 (P6/64-xlarge)
 
    [[23, 26, 29, 32], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5, P6)
   ]
diff --git a/models/hub/yolov5-p7.yaml b/models/hub/yolov5-p7.yaml
index 88a7a95cbbd1..bd2f5845f884 100644
--- a/models/hub/yolov5-p7.yaml
+++ b/models/hub/yolov5-p7.yaml
@@ -4,16 +4,16 @@
 nc: 80  # number of classes
 depth_multiple: 1.0  # model depth multiple
 width_multiple: 1.0  # layer channel multiple
-anchors: 3
+anchors: 3  # auto-anchor 3 anchors per P output layer
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [768, 3, 2]],  # 7-P5/32
@@ -21,8 +21,8 @@ backbone:
    [-1, 1, Conv, [1024, 3, 2]],  # 9-P6/64
    [-1, 3, C3, [1024]],
    [-1, 1, Conv, [1280, 3, 2]],  # 11-P7/128
-   [-1, 1, SPP, [1280, [3, 5]]],
-   [-1, 3, C3, [1280, False]],  # 13
+   [-1, 3, C3, [1280]],
+   [-1, 1, SPPF, [1280, 5]],  # 13
   ]
 
 # YOLOv5 head
diff --git a/models/hub/yolov5-panet.yaml b/models/hub/yolov5-panet.yaml
index 76b9b7e74e33..ccfbf900691c 100644
--- a/models/hub/yolov5-panet.yaml
+++ b/models/hub/yolov5-panet.yaml
@@ -9,40 +9,40 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
-   [-1, 3, BottleneckCSP, [128]],
+   [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, BottleneckCSP, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
-   [-1, 9, BottleneckCSP, [512]],
+   [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, BottleneckCSP, [1024, False]],  # 9
+   [-1, 3, C3, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 PANet head
+# YOLOv5 v6.0 PANet head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 6], 1, Concat, [1]],  # cat backbone P4
-   [-1, 3, BottleneckCSP, [512, False]],  # 13
+   [-1, 3, C3, [512, False]],  # 13
 
    [-1, 1, Conv, [256, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
    [[-1, 4], 1, Concat, [1]],  # cat backbone P3
-   [-1, 3, BottleneckCSP, [256, False]],  # 17 (P3/8-small)
+   [-1, 3, C3, [256, False]],  # 17 (P3/8-small)
 
    [-1, 1, Conv, [256, 3, 2]],
    [[-1, 14], 1, Concat, [1]],  # cat head P4
-   [-1, 3, BottleneckCSP, [512, False]],  # 20 (P4/16-medium)
+   [-1, 3, C3, [512, False]],  # 20 (P4/16-medium)
 
    [-1, 1, Conv, [512, 3, 2]],
    [[-1, 10], 1, Concat, [1]],  # cat head P5
-   [-1, 3, BottleneckCSP, [1024, False]],  # 23 (P5/32-large)
+   [-1, 3, C3, [1024, False]],  # 23 (P5/32-large)
 
    [[17, 20, 23], 1, Detect, [nc, anchors]],  # Detect(P3, P4, P5)
   ]
diff --git a/models/hub/yolov5s-ghost.yaml b/models/hub/yolov5s-ghost.yaml
index dbf2c8e03489..ff9519c3f1aa 100644
--- a/models/hub/yolov5s-ghost.yaml
+++ b/models/hub/yolov5s-ghost.yaml
@@ -9,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, GhostConv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3Ghost, [128]],
    [-1, 1, GhostConv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3Ghost, [256]],
+   [-1, 6, C3Ghost, [256]],
    [-1, 1, GhostConv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3Ghost, [512]],
    [-1, 1, GhostConv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3Ghost, [1024, False]],  # 9
+   [-1, 3, C3Ghost, [1024]],
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, GhostConv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/hub/yolov5s-transformer.yaml b/models/hub/yolov5s-transformer.yaml
index aeac1acb0582..100d7c447527 100644
--- a/models/hub/yolov5s-transformer.yaml
+++ b/models/hub/yolov5s-transformer.yaml
@@ -9,22 +9,22 @@ anchors:
   - [30,61, 62,45, 59,119]  # P4/16
   - [116,90, 156,198, 373,326]  # P5/32
 
-# YOLOv5 backbone
+# YOLOv5 v6.0 backbone
 backbone:
   # [from, number, module, args]
-  [[-1, 1, Focus, [64, 3]],  # 0-P1/2
+  [[-1, 1, Conv, [64, 6, 2, 2]],  # 0-P1/2
    [-1, 1, Conv, [128, 3, 2]],  # 1-P2/4
    [-1, 3, C3, [128]],
    [-1, 1, Conv, [256, 3, 2]],  # 3-P3/8
-   [-1, 9, C3, [256]],
+   [-1, 6, C3, [256]],
    [-1, 1, Conv, [512, 3, 2]],  # 5-P4/16
    [-1, 9, C3, [512]],
    [-1, 1, Conv, [1024, 3, 2]],  # 7-P5/32
-   [-1, 1, SPP, [1024, [5, 9, 13]]],
-   [-1, 3, C3TR, [1024, False]],  # 9  <-------- C3TR() Transformer module
+   [-1, 3, C3TR, [1024]],  # 9 <--- C3TR() Transformer module
+   [-1, 1, SPPF, [1024, 5]],  # 9
   ]
 
-# YOLOv5 head
+# YOLOv5 v6.0 head
 head:
   [[-1, 1, Conv, [512, 1, 1]],
    [-1, 1, nn.Upsample, [None, 2, 'nearest']],
diff --git a/models/tf.py b/models/tf.py
index 6c07410e03a5..6de0245cfe50 100644
--- a/models/tf.py
+++ b/models/tf.py
@@ -28,11 +28,11 @@
 import torch.nn as nn
 from tensorflow import keras
 
-from models.common import Bottleneck, BottleneckCSP, Concat, Conv, C3, DWConv, Focus, SPP, SPPF, autopad
+from models.common import C3, SPP, SPPF, Bottleneck, BottleneckCSP, Concat, Conv, DWConv, Focus, autopad
 from models.experimental import CrossConv, MixConv2d, attempt_load
 from models.yolo import Detect
-from utils.general import make_divisible, print_args, LOGGER
 from utils.activations import SiLU
+from utils.general import LOGGER, make_divisible, print_args
 
 
 class TFBN(keras.layers.Layer):
@@ -98,7 +98,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
         self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
 
     def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
-        # inputs = inputs / 255.  # normalize 0-255 to 0-1
+        # inputs = inputs / 255  # normalize 0-255 to 0-1
         return self.conv(tf.concat([inputs[:, ::2, ::2, :],
                                     inputs[:, 1::2, ::2, :],
                                     inputs[:, ::2, 1::2, :],
@@ -227,7 +227,7 @@ def call(self, inputs):
 
             if not self.training:  # inference
                 y = tf.sigmoid(x[i])
-                xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                 wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]
                 # Normalize xywh to 0-1 to reduce calibration error
                 xy /= tf.constant([[self.imgsz[1], self.imgsz[0]]], dtype=tf.float32)
@@ -414,7 +414,7 @@ def representative_dataset_gen(dataset, ncalib=100):
     for n, (path, img, im0s, vid_cap, string) in enumerate(dataset):
         input = np.transpose(img, [1, 2, 0])
         input = np.expand_dims(input, axis=0).astype(np.float32)
-        input /= 255.0
+        input /= 255
         yield [input]
         if n >= ncalib:
             break
diff --git a/models/yolo.py b/models/yolo.py
index 38a17d9e7ba4..305f0ca0cc88 100644
--- a/models/yolo.py
+++ b/models/yolo.py
@@ -20,10 +20,10 @@
 from models.common import *
 from models.experimental import *
 from utils.autoanchor import check_anchor_order
-from utils.general import check_version, check_yaml, make_divisible, print_args, LOGGER
+from utils.general import LOGGER, check_version, check_yaml, make_divisible, print_args
 from utils.plots import feature_visualization
-from utils.torch_utils import copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, \
-    select_device, time_sync
+from utils.torch_utils import (copy_attr, fuse_conv_and_bn, initialize_weights, model_info, scale_img, select_device,
+                               time_sync)
 
 try:
     import thop  # for FLOPs computation
@@ -55,15 +55,15 @@ def forward(self, x):
             x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
 
             if not self.training:  # inference
-                if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic:
+                if self.onnx_dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]:
                     self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i)
 
                 y = x[i].sigmoid()
                 if self.inplace:
-                    y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    y[..., 0:2] = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                     y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                 else:  # for YOLOv5 on AWS Inferentia https://github.com/ultralytics/yolov5/pull/2953
-                    xy = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i]  # xy
+                    xy = (y[..., 0:2] * 2 - 0.5 + self.grid[i]) * self.stride[i]  # xy
                     wh = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i]  # wh
                     y = torch.cat((xy, wh, y[..., 4:]), -1)
                 z.append(y.view(bs, -1, self.no))
@@ -90,7 +90,7 @@ def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None, anchors=None):  # model, i
         else:  # is *.yaml
             import yaml  # for torch hub
             self.yaml_file = Path(cfg).name
-            with open(cfg, errors='ignore') as f:
+            with open(cfg, encoding='ascii', errors='ignore') as f:
                 self.yaml = yaml.safe_load(f)  # model dict
 
         # Define model
@@ -201,7 +201,7 @@ def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is
         for mi, s in zip(m.m, m.stride):  # from
             b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
             b.data[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
-            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            b.data[:, 5:] += math.log(0.6 / (m.nc - 0.999999)) if cf is None else torch.log(cf / cf.sum())  # cls
             mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
 
     def _print_biases(self):
@@ -306,6 +306,7 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
     parser.add_argument('--cfg', type=str, default='yolov5s.yaml', help='model.yaml')
     parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
     parser.add_argument('--profile', action='store_true', help='profile model speed')
+    parser.add_argument('--test', action='store_true', help='test all yolo*.yaml')
     opt = parser.parse_args()
     opt.cfg = check_yaml(opt.cfg)  # check YAML
     print_args(FILE.stem, opt)
@@ -320,6 +321,14 @@ def parse_model(d, ch):  # model_dict, input_channels(3)
         img = torch.rand(8 if torch.cuda.is_available() else 1, 3, 640, 640).to(device)
         y = model(img, profile=True)
 
+    # Test all models
+    if opt.test:
+        for cfg in Path(ROOT / 'models').rglob('yolo*.yaml'):
+            try:
+                _ = Model(cfg)
+            except Exception as e:
+                print(f'Error in {cfg}: {e}')
+
     # Tensorboard (not working https://github.com/ultralytics/yolov5/issues/2898)
     # from torch.utils.tensorboard import SummaryWriter
     # tb_writer = SummaryWriter('.')
diff --git a/setup.cfg b/setup.cfg
index 7d25200cdb33..4ca0f0d7aabb 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -43,3 +43,9 @@ ignore =
     F403
     E302
     F541
+
+
+[isort]
+# https://pycqa.github.io/isort/docs/configuration/options.html
+line_length = 120
+multi_line_output = 0
diff --git a/train.py b/train.py
index 736edd036fad..698d031ad3c6 100644
--- a/train.py
+++ b/train.py
@@ -7,13 +7,13 @@
 """
 
 import argparse
-import logging
 import math
 import os
 import random
 import sys
 import time
 from copy import deepcopy
+from datetime import datetime
 from pathlib import Path
 
 import numpy as np
@@ -23,7 +23,7 @@
 import yaml
 from torch.cuda import amp
 from torch.nn.parallel import DistributedDataParallel as DDP
-from torch.optim import Adam, SGD, lr_scheduler
+from torch.optim import SGD, Adam, lr_scheduler
 from tqdm import tqdm
 
 FILE = Path(__file__).resolve()
@@ -37,19 +37,19 @@
 from models.yolo import Model
 from utils.autoanchor import check_anchors
 from utils.autobatch import check_train_batch_size
+from utils.callbacks import Callbacks
 from utils.datasets import create_dataloader
-from utils.general import labels_to_class_weights, increment_path, labels_to_image_weights, init_seeds, \
-    strip_optimizer, get_latest_run, check_dataset, check_git_status, check_img_size, check_requirements, \
-    check_file, check_yaml, check_suffix, print_args, print_mutation, one_cycle, colorstr, methods, LOGGER
 from utils.downloads import attempt_download
-from utils.loss import ComputeLoss
-from utils.plots import plot_labels, plot_evolve
-from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, intersect_dicts, select_device, \
-    torch_distributed_zero_first
+from utils.general import (LOGGER, check_dataset, check_file, check_git_status, check_img_size, check_requirements,
+                           check_suffix, check_yaml, colorstr, get_latest_run, increment_path, init_seeds,
+                           intersect_dicts, labels_to_class_weights, labels_to_image_weights, methods, one_cycle,
+                           print_args, print_mutation, strip_optimizer)
+from utils.loggers import Loggers
 from utils.loggers.wandb.wandb_utils import check_wandb_resume
+from utils.loss import ComputeLoss
 from utils.metrics import fitness
-from utils.loggers import Loggers
-from utils.callbacks import Callbacks
+from utils.plots import plot_evolve, plot_labels
+from utils.torch_utils import EarlyStopping, ModelEMA, de_parallel, select_device, torch_distributed_zero_first
 
 LOCAL_RANK = int(os.getenv('LOCAL_RANK', -1))  # https://pytorch.org/docs/stable/elastic/run.html
 RANK = int(os.getenv('RANK', -1))
@@ -105,7 +105,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
     nc = 1 if single_cls else int(data_dict['nc'])  # number of classes
     names = ['item'] if single_cls and len(data_dict['names']) != 1 else data_dict['names']  # class names
     assert len(names) == nc, f'{len(names)} names found for nc={nc} dataset in {data}'  # check
-    is_coco = data.endswith('coco.yaml') and nc == 80  # COCO dataset
+    is_coco = isinstance(val_path, str) and val_path.endswith('coco/val2017.txt')  # COCO dataset
 
     # Model
     check_suffix(weights, '.pt')  # check weights
@@ -200,8 +200,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
 
     # DP mode
     if cuda and RANK == -1 and torch.cuda.device_count() > 1:
-        logging.warning('DP not recommended, instead use torch.distributed.run for best DDP Multi-GPU results.\n'
-                        'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
+        LOGGER.warning('WARNING: DP not recommended, use torch.distributed.run for best DDP Multi-GPU results.\n'
+                       'See Multi-GPU Tutorial at https://github.com/ultralytics/yolov5/issues/475 to get started.')
         model = torch.nn.DataParallel(model)
 
     # SyncBatchNorm
@@ -246,9 +246,9 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
 
     # Model parameters
     nl = de_parallel(model).model[-1].nl  # number of detection layers (to scale hyps)
-    hyp['box'] *= 3. / nl  # scale to layers
-    hyp['cls'] *= nc / 80. * 3. / nl  # scale to classes and layers
-    hyp['obj'] *= (imgsz / 640) ** 2 * 3. / nl  # scale to image size and layers
+    hyp['box'] *= 3 / nl  # scale to layers
+    hyp['cls'] *= nc / 80 * 3 / nl  # scale to classes and layers
+    hyp['obj'] *= (imgsz / 640) ** 2 * 3 / nl  # scale to image size and layers
     hyp['label_smoothing'] = opt.label_smoothing
     model.nc = nc  # attach number of classes to model
     model.hyp = hyp  # attach hyperparameters to model
@@ -293,7 +293,7 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
         optimizer.zero_grad()
         for i, (imgs, targets, paths, _) in pbar:  # batch -------------------------------------------------------------
             ni = i + nb * epoch  # number integrated batches (since train start)
-            imgs = imgs.to(device, non_blocking=True).float() / 255.0  # uint8 to float32, 0-255 to 0.0-1.0
+            imgs = imgs.to(device, non_blocking=True).float() / 255  # uint8 to float32, 0-255 to 0.0-1.0
 
             # Warmup
             if ni <= nw:
@@ -380,7 +380,8 @@ def train(hyp,  # path/to/hyp.yaml or hyp dictionary
                         'ema': deepcopy(ema.ema).half(),
                         'updates': ema.updates,
                         'optimizer': optimizer.state_dict(),
-                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None}
+                        'wandb_id': loggers.wandb.wandb_run.id if loggers.wandb else None,
+                        'date': datetime.now().isoformat()}
 
                 # Save last, best and delete
                 torch.save(ckpt, last)
diff --git a/tutorial.ipynb b/tutorial.ipynb
index 9184a66d3f42..7763a26066e2 100644
--- a/tutorial.ipynb
+++ b/tutorial.ipynb
@@ -368,7 +368,7 @@
         "colab_type": "text"
       },
       "source": [
-        "<a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
+        "<a href=\"https://colab.research.google.com/github/ultralytics/yolov5/blob/update%2Fnotebook/tutorial.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
       ]
     },
     {
@@ -402,26 +402,24 @@
         "colab": {
           "base_uri": "https://localhost:8080/"
         },
-        "outputId": "e2e839d5-d6fc-409c-e44c-0b0b6aa9319d"
+        "outputId": "3809e5a9-dd41-4577-fe62-5531abf7cca2"
       },
       "source": [
-        "!git clone https://github.com/ultralytics/yolov5  # clone repo\n",
+        "!git clone https://github.com/ultralytics/yolov5  # clone\n",
         "%cd yolov5\n",
-        "%pip install -qr requirements.txt  # install dependencies\n",
+        "%pip install -qr requirements.txt  # install\n",
         "\n",
-        "import torch\n",
-        "from IPython.display import Image, clear_output  # to display images\n",
-        "\n",
-        "clear_output()\n",
-        "print(f\"Setup complete. Using torch {torch.__version__} ({torch.cuda.get_device_properties(0).name if torch.cuda.is_available() else 'CPU'})\")"
+        "from yolov5 import utils\n",
+        "display = utils.notebook_init()  # checks"
       ],
-      "execution_count": 11,
+      "execution_count": 2,
       "outputs": [
         {
           "output_type": "stream",
           "name": "stdout",
           "text": [
-            "Setup complete. Using torch 1.10.0+cu102 (Tesla V100-SXM2-16GB)\n"
+            "YOLOv5 🚀 v6.0-48-g84a8099 torch 1.10.0+cu102 CUDA:0 (Tesla V100-SXM2-16GB, 16160MiB)\n",
+            "Setup complete ✅\n"
           ]
         }
       ]
@@ -438,11 +436,11 @@
         "\n",
         "```shell\n",
         "python detect.py --source 0  # webcam\n",
-        "                          file.jpg  # image \n",
-        "                          file.mp4  # video\n",
+        "                          img.jpg  # image \n",
+        "                          vid.mp4  # video\n",
         "                          path/  # directory\n",
         "                          path/*.jpg  # glob\n",
-        "                          'https://youtu.be/NUsoVlDFqZg'  # YouTube\n",
+        "                          'https://youtu.be/Zgi9g1ksQHc'  # YouTube\n",
         "                          'rtsp://example.com/media.mp4'  # RTSP, RTMP, HTTP stream\n",
         "```"
       ]
@@ -458,9 +456,9 @@
       },
       "source": [
         "!python detect.py --weights yolov5s.pt --img 640 --conf 0.25 --source data/images\n",
-        "Image(filename='runs/detect/exp/zidane.jpg', width=600)"
+        "display.Image(filename='runs/detect/exp/zidane.jpg', width=600)"
       ],
-      "execution_count": 17,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -537,7 +535,7 @@
         "torch.hub.download_url_to_file('https://ultralytics.com/assets/coco2017val.zip', 'tmp.zip')\n",
         "!unzip -q tmp.zip -d ../datasets && rm tmp.zip"
       ],
-      "execution_count": 18,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "display_data",
@@ -568,7 +566,7 @@
         "# Run YOLOv5x on COCO val\n",
         "!python val.py --weights yolov5x.pt --data coco.yaml --img 640 --iou 0.65 --half"
       ],
-      "execution_count": 19,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
@@ -726,7 +724,7 @@
         "# Train YOLOv5s on COCO128 for 3 epochs\n",
         "!python train.py --img 640 --batch 16 --epochs 3 --data coco128.yaml --weights yolov5s.pt --cache"
       ],
-      "execution_count": 24,
+      "execution_count": null,
       "outputs": [
         {
           "output_type": "stream",
diff --git a/utils/__init__.py b/utils/__init__.py
index e69de29bb2d1..2b0c896364a2 100644
--- a/utils/__init__.py
+++ b/utils/__init__.py
@@ -0,0 +1,18 @@
+# YOLOv5 🚀 by Ultralytics, GPL-3.0 license
+"""
+utils/initialization
+"""
+
+
+def notebook_init():
+    # For YOLOv5 notebooks
+    print('Checking setup...')
+    from IPython import display  # to display images and clear console output
+
+    from utils.general import emojis
+    from utils.torch_utils import select_device  # YOLOv5 imports
+
+    display.clear_output()
+    select_device(newline=False)
+    print(emojis('Setup complete ✅'))
+    return display
diff --git a/utils/activations.py b/utils/activations.py
index 62eb532b3f95..4c7d46c32104 100644
--- a/utils/activations.py
+++ b/utils/activations.py
@@ -19,7 +19,7 @@ class Hardswish(nn.Module):  # export-friendly version of nn.Hardswish()
     @staticmethod
     def forward(x):
         # return x * F.hardsigmoid(x)  # for torchscript and CoreML
-        return x * F.hardtanh(x + 3, 0., 6.) / 6.  # for torchscript, CoreML and ONNX
+        return x * F.hardtanh(x + 3, 0.0, 6.0) / 6.0  # for torchscript, CoreML and ONNX
 
 
 # Mish https://github.com/digantamisra98/Mish --------------------------------------------------------------------------
diff --git a/utils/augmentations.py b/utils/augmentations.py
index b3cbbf913b65..5dcfd49fdd05 100644
--- a/utils/augmentations.py
+++ b/utils/augmentations.py
@@ -3,14 +3,13 @@
 Image augmentation functions
 """
 
-import logging
 import math
 import random
 
 import cv2
 import numpy as np
 
-from utils.general import colorstr, segment2box, resample_segments, check_version
+from utils.general import LOGGER, check_version, colorstr, resample_segments, segment2box
 from utils.metrics import bbox_ioa
 
 
@@ -32,11 +31,11 @@ def __init__(self):
                 A.ImageCompression(quality_lower=75, p=0.0)],
                 bbox_params=A.BboxParams(format='yolo', label_fields=['class_labels']))
 
-            logging.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
+            LOGGER.info(colorstr('albumentations: ') + ', '.join(f'{x}' for x in self.transform.transforms if x.p))
         except ImportError:  # package not installed, skip
             pass
         except Exception as e:
-            logging.info(colorstr('albumentations: ') + f'{e}')
+            LOGGER.info(colorstr('albumentations: ') + f'{e}')
 
     def __call__(self, im, labels, p=1.0):
         if self.transform and random.random() < p:
@@ -124,7 +123,7 @@ def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleF
 
 def random_perspective(im, targets=(), segments=(), degrees=10, translate=.1, scale=.1, shear=10, perspective=0.0,
                        border=(0, 0)):
-    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10))
+    # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10))
     # targets = [cls, xyxy]
 
     height = im.shape[0] + border[0] * 2  # shape(h,w,c)
diff --git a/utils/autoanchor.py b/utils/autoanchor.py
index 6b3c661be2f7..af0aa7de65ac 100644
--- a/utils/autoanchor.py
+++ b/utils/autoanchor.py
@@ -34,10 +34,10 @@ def check_anchors(dataset, model, thr=4.0, imgsz=640):
 
     def metric(k):  # compute metric
         r = wh[:, None] / k[None]
-        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
+        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
         best = x.max(1)[0]  # best_x
-        aat = (x > 1. / thr).float().sum(1).mean()  # anchors above threshold
-        bpr = (best > 1. / thr).float().mean()  # best possible recall
+        aat = (x > 1 / thr).float().sum(1).mean()  # anchors above threshold
+        bpr = (best > 1 / thr).float().mean()  # best possible recall
         return bpr, aat
 
     anchors = m.anchors.clone() * m.stride.to(m.anchors.device).view(-1, 1, 1)  # current anchors
@@ -80,12 +80,12 @@ def kmean_anchors(dataset='./data/coco128.yaml', n=9, img_size=640, thr=4.0, gen
     """
     from scipy.cluster.vq import kmeans
 
-    thr = 1. / thr
+    thr = 1 / thr
     prefix = colorstr('autoanchor: ')
 
     def metric(k, wh):  # compute metrics
         r = wh[:, None] / k[None]
-        x = torch.min(r, 1. / r).min(2)[0]  # ratio metric
+        x = torch.min(r, 1 / r).min(2)[0]  # ratio metric
         # x = wh_iou(wh, torch.tensor(k))  # iou metric
         return x, x.max(1)[0]  # x, best_x
 
diff --git a/utils/autobatch.py b/utils/autobatch.py
index 168b16f691ab..3f2b4d1a4c38 100644
--- a/utils/autobatch.py
+++ b/utils/autobatch.py
@@ -35,11 +35,12 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
         return batch_size
 
     d = str(device).upper()  # 'CUDA:0'
-    t = torch.cuda.get_device_properties(device).total_memory / 1024 ** 3  # (GB)
-    r = torch.cuda.memory_reserved(device) / 1024 ** 3  # (GB)
-    a = torch.cuda.memory_allocated(device) / 1024 ** 3  # (GB)
+    properties = torch.cuda.get_device_properties(device)  # device properties
+    t = properties.total_memory / 1024 ** 3  # (GiB)
+    r = torch.cuda.memory_reserved(device) / 1024 ** 3  # (GiB)
+    a = torch.cuda.memory_allocated(device) / 1024 ** 3  # (GiB)
     f = t - (r + a)  # free inside reserved
-    print(f'{prefix}{d} {t:.3g}G total, {r:.3g}G reserved, {a:.3g}G allocated, {f:.3g}G free')
+    print(f'{prefix}{d} ({properties.name}) {t:.2f}G total, {r:.2f}G reserved, {a:.2f}G allocated, {f:.2f}G free')
 
     batch_sizes = [1, 2, 4, 8, 16]
     try:
@@ -52,5 +53,5 @@ def autobatch(model, imgsz=640, fraction=0.9, batch_size=16):
     batch_sizes = batch_sizes[:len(y)]
     p = np.polyfit(batch_sizes, y, deg=1)  # first degree polynomial fit
     b = int((f * fraction - p[1]) / p[0])  # y intercept (optimal batch size)
-    print(f'{prefix}Using colorstr(batch-size {b}) for {d} {t * fraction:.3g}G/{t:.3g}G ({fraction * 100:.0f}%)')
+    print(f'{prefix}Using batch-size {b} for {d} {t * fraction:.2f}G/{t:.2f}G ({fraction * 100:.0f}%)')
     return b
diff --git a/utils/datasets.py b/utils/datasets.py
index 7fce122942f7..94acaaa92cd7 100755
--- a/utils/datasets.py
+++ b/utils/datasets.py
@@ -6,13 +6,12 @@
 import glob
 import hashlib
 import json
-import logging
 import os
 import random
 import shutil
 import time
 from itertools import repeat
-from multiprocessing.pool import ThreadPool, Pool
+from multiprocessing.pool import Pool, ThreadPool
 from pathlib import Path
 from threading import Thread
 from zipfile import ZipFile
@@ -22,13 +21,13 @@
 import torch
 import torch.nn.functional as F
 import yaml
-from PIL import Image, ImageOps, ExifTags
+from PIL import ExifTags, Image, ImageOps
 from torch.utils.data import Dataset
 from tqdm import tqdm
 
 from utils.augmentations import Albumentations, augment_hsv, copy_paste, letterbox, mixup, random_perspective
-from utils.general import check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, \
-    xywh2xyxy, xywhn2xyxy, xyxy2xywhn, xyn2xy, LOGGER
+from utils.general import (LOGGER, check_dataset, check_requirements, check_yaml, clean_str, segments2boxes, xyn2xy,
+                           xywh2xyxy, xywhn2xyxy, xyxy2xywhn)
 from utils.torch_utils import torch_distributed_zero_first
 
 # Parameters
@@ -335,7 +334,7 @@ def update(self, i, cap, stream):
                 if success:
                     self.imgs[i] = im
                 else:
-                    LOGGER.warn('WARNING: Video stream unresponsive, please check your IP camera connection.')
+                    LOGGER.warning('WARNING: Video stream unresponsive, please check your IP camera connection.')
                     self.imgs[i] *= 0
                     cap.open(stream)  # re-open stream if signal was lost
             time.sleep(1 / self.fps[i])  # wait time
@@ -427,7 +426,7 @@ def __init__(self, path, img_size=640, batch_size=16, augment=False, hyp=None, r
             d = f"Scanning '{cache_path}' images and labels... {nf} found, {nm} missing, {ne} empty, {nc} corrupted"
             tqdm(None, desc=prefix + d, total=n, initial=n)  # display cache results
             if cache['msgs']:
-                logging.info('\n'.join(cache['msgs']))  # display warnings
+                LOGGER.info('\n'.join(cache['msgs']))  # display warnings
         assert nf > 0 or not augment, f'{prefix}No labels in {cache_path}. Can not train without labels. See {HELP_URL}'
 
         # Read cache
@@ -525,9 +524,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
 
         pbar.close()
         if msgs:
-            logging.info('\n'.join(msgs))
+            LOGGER.info('\n'.join(msgs))
         if nf == 0:
-            logging.info(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
+            LOGGER.warning(f'{prefix}WARNING: No labels found in {path}. See {HELP_URL}')
         x['hash'] = get_hash(self.label_files + self.img_files)
         x['results'] = nf, nm, ne, nc, len(self.img_files)
         x['msgs'] = msgs  # warnings
@@ -535,9 +534,9 @@ def cache_labels(self, path=Path('./labels.cache'), prefix=''):
         try:
             np.save(path, x)  # save cache for next time
             path.with_suffix('.cache.npy').rename(path)  # remove .npy suffix
-            logging.info(f'{prefix}New cache created: {path}')
+            LOGGER.info(f'{prefix}New cache created: {path}')
         except Exception as e:
-            logging.info(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # path not writeable
+            LOGGER.warning(f'{prefix}WARNING: Cache directory {path.parent} is not writeable: {e}')  # not writeable
         return x
 
     def __len__(self):
@@ -634,13 +633,13 @@ def collate_fn4(batch):
         n = len(shapes) // 4
         img4, label4, path4, shapes4 = [], [], path[:n], shapes[:n]
 
-        ho = torch.tensor([[0., 0, 0, 1, 0, 0]])
-        wo = torch.tensor([[0., 0, 1, 0, 0, 0]])
-        s = torch.tensor([[1, 1, .5, .5, .5, .5]])  # scale
+        ho = torch.tensor([[0.0, 0, 0, 1, 0, 0]])
+        wo = torch.tensor([[0.0, 0, 1, 0, 0, 0]])
+        s = torch.tensor([[1, 1, 0.5, 0.5, 0.5, 0.5]])  # scale
         for i in range(n):  # zidane torch.zeros(16,3,720,1280)  # BCHW
             i *= 4
             if random.random() < 0.5:
-                im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2., mode='bilinear', align_corners=False)[
+                im = F.interpolate(img[i].unsqueeze(0).float(), scale_factor=2.0, mode='bilinear', align_corners=False)[
                     0].type(img[i].type())
                 l = label[i]
             else:
diff --git a/utils/flask_rest_api/restapi.py b/utils/flask_rest_api/restapi.py
index a54e2309715c..b93ad16a0f58 100644
--- a/utils/flask_rest_api/restapi.py
+++ b/utils/flask_rest_api/restapi.py
@@ -5,8 +5,8 @@
 import io
 
 import torch
-from PIL import Image
 from flask import Flask, request
+from PIL import Image
 
 app = Flask(__name__)
 
diff --git a/utils/general.py b/utils/general.py
index d8cac8daac22..8f59d487edfb 100755
--- a/utils/general.py
+++ b/utils/general.py
@@ -45,7 +45,7 @@
 def set_logging(name=None, verbose=True):
     # Sets level and returns logger
     rank = int(os.getenv('RANK', -1))  # rank in world for Multi-GPU trainings
-    logging.basicConfig(format="%(message)s", level=logging.INFO if (verbose and rank in (-1, 0)) else logging.WARN)
+    logging.basicConfig(format="%(message)s", level=logging.INFO if (verbose and rank in (-1, 0)) else logging.WARNING)
     return logging.getLogger(name)
 
 
@@ -125,6 +125,11 @@ def init_seeds(seed=0):
     cudnn.benchmark, cudnn.deterministic = (False, True) if seed == 0 else (True, False)
 
 
+def intersect_dicts(da, db, exclude=()):
+    # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
+    return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
+
+
 def get_latest_run(search_dir='.'):
     # Return path to most recent 'last.pt' in /runs (i.e. to --resume from)
     last_list = glob.glob(f'{search_dir}/**/last*.pt', recursive=True)
@@ -259,7 +264,8 @@ def check_requirements(requirements=ROOT / 'requirements.txt', exclude=(), insta
     if isinstance(requirements, (str, Path)):  # requirements.txt file
         file = Path(requirements)
         assert file.exists(), f"{prefix} {file.resolve()} not found, check failed."
-        requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(file.open()) if x.name not in exclude]
+        with file.open() as f:
+            requirements = [f'{x.name}{x.specifier}' for x in pkg.parse_requirements(f) if x.name not in exclude]
     else:  # list or tuple of packages
         requirements = [x for x in requirements if x not in exclude]
 
@@ -338,9 +344,12 @@ def check_file(file, suffix=''):
     elif file.startswith(('http:/', 'https:/')):  # download
         url = str(Path(file)).replace(':/', '://')  # Pathlib turns :// -> :/
         file = Path(urllib.parse.unquote(file).split('?')[0]).name  # '%2F' to '/', split https://url.com/file.txt?auth
-        print(f'Downloading {url} to {file}...')
-        torch.hub.download_url_to_file(url, file)
-        assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}'  # check
+        if Path(file).is_file():
+            print(f'Found {url} locally at {file}')  # file already exists
+        else:
+            print(f'Downloading {url} to {file}...')
+            torch.hub.download_url_to_file(url, file)
+            assert Path(file).exists() and Path(file).stat().st_size > 0, f'File download failed: {url}'  # check
         return file
     else:  # search
         files = []
@@ -777,7 +786,8 @@ def print_mutation(results, hyp, save_dir, bucket):
 
 
 def apply_classifier(x, model, img, im0):
-    # Apply a second stage classifier to yolo outputs
+    # Apply a second stage classifier to YOLO outputs
+    # Example model = torchvision.models.__dict__['efficientnet_b0'](pretrained=True).to(device).eval()
     im0 = [im0] if isinstance(im0, np.ndarray) else im0
     for i, d in enumerate(x):  # per image
         if d is not None and len(d):
@@ -802,7 +812,7 @@ def apply_classifier(x, model, img, im0):
 
                 im = im[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
                 im = np.ascontiguousarray(im, dtype=np.float32)  # uint8 to float32
-                im /= 255.0  # 0 - 255 to 0.0 - 1.0
+                im /= 255  # 0 - 255 to 0.0 - 1.0
                 ims.append(im)
 
             pred_cls2 = model(torch.Tensor(ims).to(d.device)).argmax(1)  # classifier prediction
@@ -811,33 +821,16 @@ def apply_classifier(x, model, img, im0):
     return x
 
 
-def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True):
-    # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
-    xyxy = torch.tensor(xyxy).view(-1, 4)
-    b = xyxy2xywh(xyxy)  # boxes
-    if square:
-        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
-    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
-    xyxy = xywh2xyxy(b).long()
-    clip_coords(xyxy, im.shape)
-    crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
-    if save:
-        cv2.imwrite(str(increment_path(file, mkdir=True).with_suffix('.jpg')), crop)
-    return crop
-
-
 def increment_path(path, exist_ok=False, sep='', mkdir=False):
     # Increment file or directory path, i.e. runs/exp --> runs/exp{sep}2, runs/exp{sep}3, ... etc.
     path = Path(path)  # os-agnostic
     if path.exists() and not exist_ok:
-        suffix = path.suffix
-        path = path.with_suffix('')
+        path, suffix = (path.with_suffix(''), path.suffix) if path.is_file() else (path, '')
         dirs = glob.glob(f"{path}{sep}*")  # similar paths
         matches = [re.search(rf"%s{sep}(\d+)" % path.stem, d) for d in dirs]
         i = [int(m.groups()[0]) for m in matches if m]  # indices
         n = max(i) + 1 if i else 2  # increment number
-        path = Path(f"{path}{sep}{n}{suffix}")  # update path
-    dir = path if path.suffix == '' else path.parent  # directory
-    if not dir.exists() and mkdir:
-        dir.mkdir(parents=True, exist_ok=True)  # make directory
+        path = Path(f"{path}{sep}{n}{suffix}")  # increment path
+    if mkdir:
+        path.mkdir(parents=True, exist_ok=True)  # make directory
     return path
diff --git a/utils/loggers/wandb/sweep.py b/utils/loggers/wandb/sweep.py
index 6029f6b8039d..206059bc30bf 100644
--- a/utils/loggers/wandb/sweep.py
+++ b/utils/loggers/wandb/sweep.py
@@ -8,10 +8,10 @@
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))  # add ROOT to PATH
 
-from train import train, parse_opt
+from train import parse_opt, train
+from utils.callbacks import Callbacks
 from utils.general import increment_path
 from utils.torch_utils import select_device
-from utils.callbacks import Callbacks
 
 
 def sweep():
diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py
index 8546ec6c63cb..a71bc6ce96d2 100644
--- a/utils/loggers/wandb/wandb_utils.py
+++ b/utils/loggers/wandb/wandb_utils.py
@@ -16,8 +16,7 @@
 if str(ROOT) not in sys.path:
     sys.path.append(str(ROOT))  # add ROOT to PATH
 
-from utils.datasets import LoadImagesAndLabels
-from utils.datasets import img2label_paths
+from utils.datasets import LoadImagesAndLabels, img2label_paths
 from utils.general import check_dataset, check_file
 
 try:
diff --git a/utils/loss.py b/utils/loss.py
index e8ce42ad994a..194c8e503e0e 100644
--- a/utils/loss.py
+++ b/utils/loss.py
@@ -108,7 +108,7 @@ def __init__(self, model, autobalance=False):
             BCEcls, BCEobj = FocalLoss(BCEcls, g), FocalLoss(BCEobj, g)
 
         det = model.module.model[-1] if is_parallel(model) else model.model[-1]  # Detect() module
-        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, .02])  # P3-P7
+        self.balance = {3: [4.0, 1.0, 0.4]}.get(det.nl, [4.0, 1.0, 0.25, 0.06, 0.02])  # P3-P7
         self.ssi = list(det.stride).index(16) if autobalance else 0  # stride 16 index
         self.BCEcls, self.BCEobj, self.gr, self.hyp, self.autobalance = BCEcls, BCEobj, 1.0, h, autobalance
         for k in 'na', 'nc', 'nl', 'anchors':
@@ -129,7 +129,7 @@ def __call__(self, p, targets):  # predictions, targets, model
                 ps = pi[b, a, gj, gi]  # prediction subset corresponding to targets
 
                 # Regression
-                pxy = ps[:, :2].sigmoid() * 2. - 0.5
+                pxy = ps[:, :2].sigmoid() * 2 - 0.5
                 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i]
                 pbox = torch.cat((pxy, pwh), 1)  # predicted box
                 iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True)  # iou(prediction, target)
@@ -189,15 +189,15 @@ def build_targets(self, p, targets):
             if nt:
                 # Matches
                 r = t[:, :, 4:6] / anchors[:, None]  # wh ratio
-                j = torch.max(r, 1. / r).max(2)[0] < self.hyp['anchor_t']  # compare
+                j = torch.max(r, 1 / r).max(2)[0] < self.hyp['anchor_t']  # compare
                 # j = wh_iou(anchors, t[:, 4:6]) > model.hyp['iou_t']  # iou(3,n)=wh_iou(anchors(3,2), gwh(n,2))
                 t = t[j]  # filter
 
                 # Offsets
                 gxy = t[:, 2:4]  # grid xy
                 gxi = gain[[2, 3]] - gxy  # inverse
-                j, k = ((gxy % 1. < g) & (gxy > 1.)).T
-                l, m = ((gxi % 1. < g) & (gxi > 1.)).T
+                j, k = ((gxy % 1 < g) & (gxy > 1)).T
+                l, m = ((gxi % 1 < g) & (gxi > 1)).T
                 j = torch.stack((torch.ones_like(j), j, k, l, m))
                 t = t.repeat((5, 1, 1))[j]
                 offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j]
diff --git a/utils/plots.py b/utils/plots.py
index 00cda6d8d986..b5e25d668d22 100644
--- a/utils/plots.py
+++ b/utils/plots.py
@@ -17,7 +17,7 @@
 import torch
 from PIL import Image, ImageDraw, ImageFont
 
-from utils.general import user_config_dir, is_ascii, is_chinese, xywh2xyxy, xyxy2xywh
+from utils.general import clip_coords, increment_path, is_ascii, is_chinese, user_config_dir, xywh2xyxy, xyxy2xywh
 from utils.metrics import fitness
 
 # Settings
@@ -117,6 +117,33 @@ def result(self):
         return np.asarray(self.im)
 
 
+def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
+    """
+    x:              Features to be visualized
+    module_type:    Module type
+    stage:          Module stage within model
+    n:              Maximum number of feature maps to plot
+    save_dir:       Directory to save results
+    """
+    if 'Detect' not in module_type:
+        batch, channels, height, width = x.shape  # batch, channels, height, width
+        if height > 1 and width > 1:
+            f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+
+            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
+            n = min(n, channels)  # number of plots
+            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
+            ax = ax.ravel()
+            plt.subplots_adjust(wspace=0.05, hspace=0.05)
+            for i in range(n):
+                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
+                ax[i].axis('off')
+
+            print(f'Saving {save_dir / f}... ({n}/{channels})')
+            plt.savefig(save_dir / f, dpi=300, bbox_inches='tight')
+            plt.close()
+
+
 def hist2d(x, y, n=100):
     # 2d histogram used in labels.png and evolve.png
     xedges, yedges = np.linspace(x.min(), x.max(), n), np.linspace(y.min(), y.max(), n)
@@ -155,7 +182,7 @@ def plot_images(images, targets, paths=None, fname='images.jpg', names=None, max
     if isinstance(targets, torch.Tensor):
         targets = targets.cpu().numpy()
     if np.max(images[0]) <= 1:
-        images *= 255.0  # de-normalise (optional)
+        images *= 255  # de-normalise (optional)
     bs, _, h, w = images.shape  # batch size, _, height, width
     bs = min(bs, max_subplots)  # limit plot images
     ns = np.ceil(bs ** 0.5)  # number of subplots (square)
@@ -337,37 +364,6 @@ def plot_labels(labels, names=(), save_dir=Path('')):
     plt.close()
 
 
-def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
-    # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
-    ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
-    s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
-    files = list(Path(save_dir).glob('frames*.txt'))
-    for fi, f in enumerate(files):
-        try:
-            results = np.loadtxt(f, ndmin=2).T[:, 90:-30]  # clip first and last rows
-            n = results.shape[1]  # number of rows
-            x = np.arange(start, min(stop, n) if stop else n)
-            results = results[:, x]
-            t = (results[0] - results[0].min())  # set t0=0s
-            results[0] = x
-            for i, a in enumerate(ax):
-                if i < len(results):
-                    label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
-                    a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
-                    a.set_title(s[i])
-                    a.set_xlabel('time (s)')
-                    # if fi == len(files) - 1:
-                    #     a.set_ylim(bottom=0)
-                    for side in ['top', 'right']:
-                        a.spines[side].set_visible(False)
-                else:
-                    a.remove()
-        except Exception as e:
-            print(f'Warning: Plotting error for {f}; {e}')
-    ax[1].legend()
-    plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
-
-
 def plot_evolve(evolve_csv='path/to/evolve.csv'):  # from utils.plots import *; plot_evolve()
     # Plot evolve.csv hyp evolution results
     evolve_csv = Path(evolve_csv)
@@ -420,28 +416,48 @@ def plot_results(file='path/to/results.csv', dir=''):
     plt.close()
 
 
-def feature_visualization(x, module_type, stage, n=32, save_dir=Path('runs/detect/exp')):
-    """
-    x:              Features to be visualized
-    module_type:    Module type
-    stage:          Module stage within model
-    n:              Maximum number of feature maps to plot
-    save_dir:       Directory to save results
-    """
-    if 'Detect' not in module_type:
-        batch, channels, height, width = x.shape  # batch, channels, height, width
-        if height > 1 and width > 1:
-            f = f"stage{stage}_{module_type.split('.')[-1]}_features.png"  # filename
+def profile_idetection(start=0, stop=0, labels=(), save_dir=''):
+    # Plot iDetection '*.txt' per-image logs. from utils.plots import *; profile_idetection()
+    ax = plt.subplots(2, 4, figsize=(12, 6), tight_layout=True)[1].ravel()
+    s = ['Images', 'Free Storage (GB)', 'RAM Usage (GB)', 'Battery', 'dt_raw (ms)', 'dt_smooth (ms)', 'real-world FPS']
+    files = list(Path(save_dir).glob('frames*.txt'))
+    for fi, f in enumerate(files):
+        try:
+            results = np.loadtxt(f, ndmin=2).T[:, 90:-30]  # clip first and last rows
+            n = results.shape[1]  # number of rows
+            x = np.arange(start, min(stop, n) if stop else n)
+            results = results[:, x]
+            t = (results[0] - results[0].min())  # set t0=0s
+            results[0] = x
+            for i, a in enumerate(ax):
+                if i < len(results):
+                    label = labels[fi] if len(labels) else f.stem.replace('frames_', '')
+                    a.plot(t, results[i], marker='.', label=label, linewidth=1, markersize=5)
+                    a.set_title(s[i])
+                    a.set_xlabel('time (s)')
+                    # if fi == len(files) - 1:
+                    #     a.set_ylim(bottom=0)
+                    for side in ['top', 'right']:
+                        a.spines[side].set_visible(False)
+                else:
+                    a.remove()
+        except Exception as e:
+            print(f'Warning: Plotting error for {f}; {e}')
+    ax[1].legend()
+    plt.savefig(Path(save_dir) / 'idetection_profile.png', dpi=200)
 
-            blocks = torch.chunk(x[0].cpu(), channels, dim=0)  # select batch index 0, block by channels
-            n = min(n, channels)  # number of plots
-            fig, ax = plt.subplots(math.ceil(n / 8), 8, tight_layout=True)  # 8 rows x n/8 cols
-            ax = ax.ravel()
-            plt.subplots_adjust(wspace=0.05, hspace=0.05)
-            for i in range(n):
-                ax[i].imshow(blocks[i].squeeze())  # cmap='gray'
-                ax[i].axis('off')
 
-            print(f'Saving {save_dir / f}... ({n}/{channels})')
-            plt.savefig(save_dir / f, dpi=300, bbox_inches='tight')
-            plt.close()
+def save_one_box(xyxy, im, file='image.jpg', gain=1.02, pad=10, square=False, BGR=False, save=True):
+    # Save image crop as {file} with crop size multiple {gain} and {pad} pixels. Save and/or return crop
+    xyxy = torch.tensor(xyxy).view(-1, 4)
+    b = xyxy2xywh(xyxy)  # boxes
+    if square:
+        b[:, 2:] = b[:, 2:].max(1)[0].unsqueeze(1)  # attempt rectangle to square
+    b[:, 2:] = b[:, 2:] * gain + pad  # box wh * gain + pad
+    xyxy = xywh2xyxy(b).long()
+    clip_coords(xyxy, im.shape)
+    crop = im[int(xyxy[0, 1]):int(xyxy[0, 3]), int(xyxy[0, 0]):int(xyxy[0, 2]), ::(1 if BGR else -1)]
+    if save:
+        file.parent.mkdir(parents=True, exist_ok=True)  # make directory
+        cv2.imwrite(str(increment_path(file).with_suffix('.jpg')), crop)
+    return crop
diff --git a/utils/torch_utils.py b/utils/torch_utils.py
index 6e619d9c6955..16289104eb48 100644
--- a/utils/torch_utils.py
+++ b/utils/torch_utils.py
@@ -4,7 +4,6 @@
 """
 
 import datetime
-import logging
 import math
 import os
 import platform
@@ -18,7 +17,6 @@
 import torch.distributed as dist
 import torch.nn as nn
 import torch.nn.functional as F
-import torchvision
 
 from utils.general import LOGGER
 
@@ -55,7 +53,7 @@ def git_describe(path=Path(__file__).parent):  # path must be a directory
         return ''  # not a git repository
 
 
-def select_device(device='', batch_size=None):
+def select_device(device='', batch_size=None, newline=True):
     # device = 'cpu' or '0' or '0,1,2,3'
     s = f'YOLOv5 🚀 {git_describe() or date_modified()} torch {torch.__version__} '  # string
     device = str(device).strip().lower().replace('cuda:', '')  # to string, 'cuda:0' to '0'
@@ -79,6 +77,8 @@ def select_device(device='', batch_size=None):
     else:
         s += 'CPU\n'
 
+    if not newline:
+        s = s.rstrip()
     LOGGER.info(s.encode().decode('ascii', 'ignore') if platform.system() == 'Windows' else s)  # emoji-safe
     return torch.device('cuda:0' if cuda else 'cpu')
 
@@ -100,7 +100,6 @@ def profile(input, ops, n=10, device=None):
     #     profile(input, [m1, m2], n=100)  # profile over 100 iterations
 
     results = []
-    logging.basicConfig(format="%(message)s", level=logging.INFO)
     device = device or select_device()
     print(f"{'Params':>12s}{'GFLOPs':>12s}{'GPU_mem (GB)':>14s}{'forward (ms)':>14s}{'backward (ms)':>14s}"
           f"{'input':>24s}{'output':>24s}")
@@ -111,7 +110,7 @@ def profile(input, ops, n=10, device=None):
         for m in ops if isinstance(ops, list) else [ops]:
             m = m.to(device) if hasattr(m, 'to') else m  # device
             m = m.half() if hasattr(m, 'half') and isinstance(x, torch.Tensor) and x.dtype is torch.float16 else m
-            tf, tb, t = 0., 0., [0., 0., 0.]  # dt forward, backward
+            tf, tb, t = 0, 0, [0, 0, 0]  # dt forward, backward
             try:
                 flops = thop.profile(m, inputs=(x,), verbose=False)[0] / 1E9 * 2  # GFLOPs
             except:
@@ -153,11 +152,6 @@ def de_parallel(model):
     return model.module if is_parallel(model) else model
 
 
-def intersect_dicts(da, db, exclude=()):
-    # Dictionary intersection of matching keys and shapes, omitting 'exclude' keys, using da values
-    return {k: v for k, v in da.items() if k in db and not any(x in k for x in exclude) and v.shape == db[k].shape}
-
-
 def initialize_weights(model):
     for m in model.modules():
         t = type(m)
@@ -177,7 +171,7 @@ def find_modules(model, mclass=nn.Conv2d):
 
 def sparsity(model):
     # Return global model sparsity
-    a, b = 0., 0.
+    a, b = 0, 0
     for p in model.parameters():
         a += p.numel()
         b += (p == 0).sum()
@@ -242,25 +236,6 @@ def model_info(model, verbose=False, img_size=640):
     LOGGER.info(f"Model Summary: {len(list(model.modules()))} layers, {n_p} parameters, {n_g} gradients{fs}")
 
 
-def load_classifier(name='resnet101', n=2):
-    # Loads a pretrained model reshaped to n-class output
-    model = torchvision.models.__dict__[name](pretrained=True)
-
-    # ResNet model properties
-    # input_size = [3, 224, 224]
-    # input_space = 'RGB'
-    # input_range = [0, 1]
-    # mean = [0.485, 0.456, 0.406]
-    # std = [0.229, 0.224, 0.225]
-
-    # Reshape output to n classes
-    filters = model.fc.weight.shape[1]
-    model.fc.bias = nn.Parameter(torch.zeros(n), requires_grad=True)
-    model.fc.weight = nn.Parameter(torch.zeros(n, filters), requires_grad=True)
-    model.fc.out_features = n
-    return model
-
-
 def scale_img(img, ratio=1.0, same_shape=False, gs=32):  # img(16,3,256,416)
     # scales img(bs,3,y,x) by ratio constrained to gs-multiple
     if ratio == 1.0:
@@ -336,7 +311,7 @@ def update(self, model):
             for k, v in self.ema.state_dict().items():
                 if v.dtype.is_floating_point:
                     v *= d
-                    v += (1. - d) * msd[k].detach()
+                    v += (1 - d) * msd[k].detach()
 
     def update_attr(self, model, include=(), exclude=('process_group', 'reducer')):
         # Update EMA attributes
diff --git a/val.py b/val.py
index 4aab87e275d3..7f23b8704de5 100644
--- a/val.py
+++ b/val.py
@@ -23,15 +23,15 @@
     sys.path.append(str(ROOT))  # add ROOT to PATH
 ROOT = Path(os.path.relpath(ROOT, Path.cwd()))  # relative
 
-from models.experimental import attempt_load
+from models.common import DetectMultiBackend
+from utils.callbacks import Callbacks
 from utils.datasets import create_dataloader
-from utils.general import box_iou, coco80_to_coco91_class, colorstr, check_dataset, check_img_size, \
-    check_requirements, check_suffix, check_yaml, increment_path, non_max_suppression, print_args, scale_coords, \
-    xyxy2xywh, xywh2xyxy, LOGGER
-from utils.metrics import ap_per_class, ConfusionMatrix
+from utils.general import (LOGGER, box_iou, check_dataset, check_img_size, check_requirements, check_yaml,
+                           coco80_to_coco91_class, colorstr, increment_path, non_max_suppression, print_args,
+                           scale_coords, xywh2xyxy, xyxy2xywh)
+from utils.metrics import ConfusionMatrix, ap_per_class
 from utils.plots import output_to_target, plot_images, plot_val_study
 from utils.torch_utils import select_device, time_sync
-from utils.callbacks import Callbacks
 
 
 def save_one_txt(predn, save_conf, shape, file):
@@ -100,6 +100,7 @@ def run(data,
         name='exp',  # save to project/name
         exist_ok=False,  # existing project/name ok, do not increment
         half=True,  # use FP16 half-precision inference
+        dnn=False,  # use OpenCV DNN for ONNX inference
         model=None,
         dataloader=None,
         save_dir=Path(''),
@@ -110,8 +111,10 @@ def run(data,
     # Initialize/load model and set device
     training = model is not None
     if training:  # called by train.py
-        device = next(model.parameters()).device  # get model device
+        device, pt = next(model.parameters()).device, True  # get model device, PyTorch model
 
+        half &= device.type != 'cpu'  # half precision only supported on CUDA
+        model.half() if half else model.float()
     else:  # called directly
         device = select_device(device, batch_size=batch_size)
 
@@ -120,22 +123,21 @@ def run(data,
         (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
 
         # Load model
-        check_suffix(weights, '.pt')
-        model = attempt_load(weights, map_location=device)  # load FP32 model
-        gs = max(int(model.stride.max()), 32)  # grid size (max stride)
-        imgsz = check_img_size(imgsz, s=gs)  # check image size
-
-        # Multi-GPU disabled, incompatible with .half() https://github.com/ultralytics/yolov5/issues/99
-        # if device.type != 'cpu' and torch.cuda.device_count() > 1:
-        #     model = nn.DataParallel(model)
+        model = DetectMultiBackend(weights, device=device, dnn=dnn)
+        stride, pt = model.stride, model.pt
+        imgsz = check_img_size(imgsz, s=stride)  # check image size
+        half &= pt and device.type != 'cpu'  # half precision only supported by PyTorch on CUDA
+        if pt:
+            model.model.half() if half else model.model.float()
+        else:
+            half = False
+            batch_size = 1  # export.py models default to batch-size 1
+            device = torch.device('cpu')
+            LOGGER.info(f'Forcing --batch-size 1 square inference shape(1,3,{imgsz},{imgsz}) for non-PyTorch backends')
 
         # Data
         data = check_dataset(data)  # check
 
-    # Half
-    half &= device.type != 'cpu'  # half precision only supported on CUDA
-    model.half() if half else model.float()
-
     # Configure
     model.eval()
     is_coco = isinstance(data.get('val'), str) and data['val'].endswith('coco/val2017.txt')  # COCO dataset
@@ -145,11 +147,11 @@ def run(data,
 
     # Dataloader
     if not training:
-        if device.type != 'cpu':
-            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.parameters())))  # run once
+        if pt and device.type != 'cpu':
+            model(torch.zeros(1, 3, imgsz, imgsz).to(device).type_as(next(model.model.parameters())))  # warmup
         pad = 0.0 if task == 'speed' else 0.5
         task = task if task in ('train', 'val', 'test') else 'val'  # path to train/val/test images
-        dataloader = create_dataloader(data[task], imgsz, batch_size, gs, single_cls, pad=pad, rect=True,
+        dataloader = create_dataloader(data[task], imgsz, batch_size, stride, single_cls, pad=pad, rect=pt,
                                        prefix=colorstr(f'{task}: '))[0]
 
     seen = 0
@@ -161,32 +163,33 @@ def run(data,
     loss = torch.zeros(3, device=device)
     jdict, stats, ap, ap_class = [], [], [], []
     pbar = tqdm(dataloader, desc=s, bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}')  # progress bar
-    for batch_i, (img, targets, paths, shapes) in enumerate(pbar):
+    for batch_i, (im, targets, paths, shapes) in enumerate(pbar):
         t1 = time_sync()
-        img = img.to(device, non_blocking=True)
-        img = img.half() if half else img.float()  # uint8 to fp16/32
-        img /= 255.0  # 0 - 255 to 0.0 - 1.0
-        targets = targets.to(device)
-        nb, _, height, width = img.shape  # batch size, channels, height, width
+        if pt:
+            im = im.to(device, non_blocking=True)
+            targets = targets.to(device)
+        im = im.half() if half else im.float()  # uint8 to fp16/32
+        im /= 255  # 0 - 255 to 0.0 - 1.0
+        nb, _, height, width = im.shape  # batch size, channels, height, width
         t2 = time_sync()
         dt[0] += t2 - t1
 
-        # Run model
-        out, train_out = model(img, augment=augment)  # inference and training outputs
+        # Inference
+        out, train_out = model(im) if training else model(im, augment=augment, val=True)  # inference, loss outputs
         dt[1] += time_sync() - t2
 
-        # Compute loss
+        # Loss
         if compute_loss:
             loss += compute_loss([x.float() for x in train_out], targets)[1]  # box, obj, cls
 
-        # Run NMS
+        # NMS
         targets[:, 2:] *= torch.Tensor([width, height, width, height]).to(device)  # to pixels
         lb = [targets[targets[:, 0] == i, 1:] for i in range(nb)] if save_hybrid else []  # for autolabelling
         t3 = time_sync()
         out = non_max_suppression(out, conf_thres, iou_thres, labels=lb, multi_label=True, agnostic=single_cls)
         dt[2] += time_sync() - t3
 
-        # Statistics per image
+        # Metrics
         for si, pred in enumerate(out):
             labels = targets[targets[:, 0] == si, 1:]
             nl = len(labels)
@@ -203,12 +206,12 @@ def run(data,
             if single_cls:
                 pred[:, 5] = 0
             predn = pred.clone()
-            scale_coords(img[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
+            scale_coords(im[si].shape[1:], predn[:, :4], shape, shapes[si][1])  # native-space pred
 
             # Evaluate
             if nl:
                 tbox = xywh2xyxy(labels[:, 1:5])  # target boxes
-                scale_coords(img[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
+                scale_coords(im[si].shape[1:], tbox, shape, shapes[si][1])  # native-space labels
                 labelsn = torch.cat((labels[:, 0:1], tbox), 1)  # native-space labels
                 correct = process_batch(predn, labelsn, iouv)
                 if plots:
@@ -222,16 +225,16 @@ def run(data,
                 save_one_txt(predn, save_conf, shape, file=save_dir / 'labels' / (path.stem + '.txt'))
             if save_json:
                 save_one_json(predn, jdict, path, class_map)  # append to COCO-JSON dictionary
-            callbacks.run('on_val_image_end', pred, predn, path, names, img[si])
+            callbacks.run('on_val_image_end', pred, predn, path, names, im[si])
 
         # Plot images
         if plots and batch_i < 3:
             f = save_dir / f'val_batch{batch_i}_labels.jpg'  # labels
-            Thread(target=plot_images, args=(img, targets, paths, f, names), daemon=True).start()
+            Thread(target=plot_images, args=(im, targets, paths, f, names), daemon=True).start()
             f = save_dir / f'val_batch{batch_i}_pred.jpg'  # predictions
-            Thread(target=plot_images, args=(img, output_to_target(out), paths, f, names), daemon=True).start()
+            Thread(target=plot_images, args=(im, output_to_target(out), paths, f, names), daemon=True).start()
 
-    # Compute statistics
+    # Compute metrics
     stats = [np.concatenate(x, 0) for x in zip(*stats)]  # to numpy
     if len(stats) and stats[0].any():
         p, r, ap, f1, ap_class = ap_per_class(*stats, plot=plots, save_dir=save_dir, names=names)
@@ -319,6 +322,7 @@ def parse_opt():
     parser.add_argument('--name', default='exp', help='save to project/name')
     parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
     parser.add_argument('--half', action='store_true', help='use FP16 half-precision inference')
+    parser.add_argument('--dnn', action='store_true', help='use OpenCV DNN for ONNX inference')
     opt = parser.parse_args()
     opt.data = check_yaml(opt.data)  # check YAML
     opt.save_json |= opt.data.endswith('coco.yaml')
@@ -331,6 +335,8 @@ def main(opt):
     check_requirements(requirements=ROOT / 'requirements.txt', exclude=('tensorboard', 'thop'))
 
     if opt.task in ('train', 'val', 'test'):  # run normally
+        if opt.conf_thres > 0.001:  # https://github.com/ultralytics/yolov5/issues/1466
+            LOGGER.info(f'WARNING: confidence threshold {opt.conf_thres} >> 0.001 will produce invalid mAP values.')
         run(**vars(opt))
 
     elif opt.task == 'speed':  # speed benchmarks