From 2211ef5b3aebe5dc497745778e7f080f27dd459d Mon Sep 17 00:00:00 2001 From: San <99511815+sanowl@users.noreply.github.com> Date: Fri, 21 Jun 2024 07:40:35 +0300 Subject: [PATCH 1/5] Refactor YOLOv5 code for readability, maintainability, and efficiency --- models/yolo.py | 34 ++++------------------------------ 1 file changed, 4 insertions(+), 30 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index d89c5da018de..fa898788f67d 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -26,41 +26,15 @@ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import ( - C3, - C3SPP, - C3TR, - SPP, - SPPF, - Bottleneck, - BottleneckCSP, - C3Ghost, - C3x, - Classify, - Concat, - Contract, - Conv, - CrossConv, - DetectMultiBackend, - DWConv, - DWConvTranspose2d, - Expand, - Focus, - GhostBottleneck, - GhostConv, - Proto, + C3, C3SPP, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C3Ghost, C3x, Classify, Concat, Contract, Conv, + CrossConv, DetectMultiBackend, DWConv, DWConvTranspose2d, Expand, Focus, GhostBottleneck, GhostConv, Proto, ) from models.experimental import MixConv2d from utils.autoanchor import check_anchor_order from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args from utils.plots import feature_visualization from utils.torch_utils import ( - fuse_conv_and_bn, - initialize_weights, - model_info, - profile, - scale_img, - select_device, - time_sync, + fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync, ) try: @@ -341,7 +315,7 @@ def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None): class ClassificationModel(BaseModel): # YOLOv5 classification model def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): - """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff` + """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cutoff` index. """ super().__init__() From 112c6a9641a40bd0515a0defc2b3132fbf148613 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Fri, 21 Jun 2024 04:41:58 +0000 Subject: [PATCH 2/5] Auto-format by https://ultralytics.com/actions --- models/yolo.py | 32 +++++++++++++++++++++++++++++--- 1 file changed, 29 insertions(+), 3 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index fa898788f67d..806a63ee0a41 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -26,15 +26,41 @@ ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative from models.common import ( - C3, C3SPP, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C3Ghost, C3x, Classify, Concat, Contract, Conv, - CrossConv, DetectMultiBackend, DWConv, DWConvTranspose2d, Expand, Focus, GhostBottleneck, GhostConv, Proto, + C3, + C3SPP, + C3TR, + SPP, + SPPF, + Bottleneck, + BottleneckCSP, + C3Ghost, + C3x, + Classify, + Concat, + Contract, + Conv, + CrossConv, + DetectMultiBackend, + DWConv, + DWConvTranspose2d, + Expand, + Focus, + GhostBottleneck, + GhostConv, + Proto, ) from models.experimental import MixConv2d from utils.autoanchor import check_anchor_order from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args from utils.plots import feature_visualization from utils.torch_utils import ( - fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync, + fuse_conv_and_bn, + initialize_weights, + model_info, + profile, + scale_img, + select_device, + time_sync, ) try: From 0efe2936a72b4e606f82c0a6eb5a4aa8219e8c9b Mon Sep 17 00:00:00 2001 From: San <99511815+sanowl@users.noreply.github.com> Date: Fri, 21 Jun 2024 14:57:32 +0300 Subject: [PATCH 3/5] Implement YOLOv5 model with training pipeline - Add core YOLOv5 architecture (Conv, Bottleneck, C3, SPPF, Detect) - Implement custom loss function (YOLOLoss) - Add utility functions (bbox_iou, non_max_suppression, xywh2xyxy) - Create basic training loop and data loading structure - Set up main function with argument parsing for easy execution --- models/yolo.py | 826 +++++++++++++++++++++++-------------------------- 1 file changed, 383 insertions(+), 443 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index fa898788f67d..3a9fb93fecfe 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -1,464 +1,404 @@ -# Ultralytics YOLOv5 🚀, AGPL-3.0 license -""" -YOLO-specific modules. +import torch +import torch.nn as nn +import torch.optim as optim +import torch.nn.functional as F +import numpy as np +from typing import List, Tuple, Optional +import math +import argparse -Usage: - $ python models/yolo.py --cfg yolov5s.yaml -""" +def autopad(k, p=None): + if p is None: + p = k // 2 if isinstance(k, int) else [x // 2 for x in k] + return p -import argparse -import contextlib -import math -import os -import platform -import sys -from copy import deepcopy -from pathlib import Path +class Conv(nn.Module): + def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): + super().__init__() + self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False) + self.bn = nn.BatchNorm2d(c2) + self.act = nn.SiLU() if act is True else (act if isinstance(act, nn.Module) else nn.Identity()) -import torch -import torch.nn as nn + def forward(self, x): + return self.act(self.bn(self.conv(x))) + +class Bottleneck(nn.Module): + def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): + super().__init__() + c_ = int(c2 * e) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_, c2, 3, 1, g=g) + self.add = shortcut and c1 == c2 + + def forward(self, x): + return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + +class C3(nn.Module): + def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): + super().__init__() + c_ = int(c2 * e) + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c1, c_, 1, 1) + self.cv3 = Conv(2 * c_, c2, 1) + self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)]) + + def forward(self, x): + return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) -FILE = Path(__file__).resolve() -ROOT = FILE.parents[1] # YOLOv5 root directory -if str(ROOT) not in sys.path: - sys.path.append(str(ROOT)) # add ROOT to PATH -if platform.system() != "Windows": - ROOT = Path(os.path.relpath(ROOT, Path.cwd())) # relative - -from models.common import ( - C3, C3SPP, C3TR, SPP, SPPF, Bottleneck, BottleneckCSP, C3Ghost, C3x, Classify, Concat, Contract, Conv, - CrossConv, DetectMultiBackend, DWConv, DWConvTranspose2d, Expand, Focus, GhostBottleneck, GhostConv, Proto, -) -from models.experimental import MixConv2d -from utils.autoanchor import check_anchor_order -from utils.general import LOGGER, check_version, check_yaml, colorstr, make_divisible, print_args -from utils.plots import feature_visualization -from utils.torch_utils import ( - fuse_conv_and_bn, initialize_weights, model_info, profile, scale_img, select_device, time_sync, -) - -try: - import thop # for FLOPs computation -except ImportError: - thop = None +class SPPF(nn.Module): + def __init__(self, c1, c2, k=5): + super().__init__() + c_ = c1 // 2 + self.cv1 = Conv(c1, c_, 1, 1) + self.cv2 = Conv(c_ * 4, c2, 1, 1) + self.m = nn.MaxPool2d(kernel_size=k, stride=1, padding=k // 2) + def forward(self, x): + x = self.cv1(x) + y1 = self.m(x) + y2 = self.m(y1) + return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) class Detect(nn.Module): - # YOLOv5 Detect head for detection models - stride = None # strides computed during build - dynamic = False # force grid reconstruction - export = False # export mode + stride = None + onnx_dynamic = False - def __init__(self, nc=80, anchors=(), ch=(), inplace=True): - """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations.""" + def __init__(self, nc=80, anchors=(), ch=()): super().__init__() - self.nc = nc # number of classes - self.no = nc + 5 # number of outputs per anchor - self.nl = len(anchors) # number of detection layers - self.na = len(anchors[0]) // 2 # number of anchors - self.grid = [torch.empty(0) for _ in range(self.nl)] # init grid - self.anchor_grid = [torch.empty(0) for _ in range(self.nl)] # init anchor grid - self.register_buffer("anchors", torch.tensor(anchors).float().view(self.nl, -1, 2)) # shape(nl,na,2) - self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv - self.inplace = inplace # use inplace ops (e.g. slice assignment) + self.nc = nc + self.no = nc + 5 + self.nl = len(anchors) + self.na = len(anchors[0]) // 2 + self.grid = [torch.zeros(1)] * self.nl + a = torch.tensor(anchors).float().view(self.nl, -1, 2) + self.register_buffer('anchors', a) + self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) + self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) def forward(self, x): - """Processes input through YOLOv5 layers, altering shape for detection: `x(bs, 3, ny, nx, 85)`.""" - z = [] # inference output + z = [] for i in range(self.nl): - x[i] = self.m[i](x[i]) # conv - bs, _, ny, nx = x[i].shape # x(bs,255,20,20) to x(bs,3,20,20,85) + x[i] = self.m[i](x[i]) + bs, _, ny, nx = x[i].shape x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous() - if not self.training: # inference - if self.dynamic or self.grid[i].shape[2:4] != x[i].shape[2:4]: - self.grid[i], self.anchor_grid[i] = self._make_grid(nx, ny, i) - - if isinstance(self, Segment): # (boxes + masks) - xy, wh, conf, mask = x[i].split((2, 2, self.nc + 1, self.no - self.nc - 5), 4) - xy = (xy.sigmoid() * 2 + self.grid[i]) * self.stride[i] # xy - wh = (wh.sigmoid() * 2) ** 2 * self.anchor_grid[i] # wh - y = torch.cat((xy, wh, conf.sigmoid(), mask), 4) - else: # Detect (boxes only) - xy, wh, conf = x[i].sigmoid().split((2, 2, self.nc + 1), 4) - xy = (xy * 2 + self.grid[i]) * self.stride[i] # xy - wh = (wh * 2) ** 2 * self.anchor_grid[i] # wh - y = torch.cat((xy, wh, conf), 4) - z.append(y.view(bs, self.na * nx * ny, self.no)) - - return x if self.training else (torch.cat(z, 1),) if self.export else (torch.cat(z, 1), x) - - def _make_grid(self, nx=20, ny=20, i=0, torch_1_10=check_version(torch.__version__, "1.10.0")): - """Generates a mesh grid for anchor boxes with optional compatibility for torch versions < 1.10.""" - d = self.anchors[i].device - t = self.anchors[i].dtype - shape = 1, self.na, ny, nx, 2 # grid shape - y, x = torch.arange(ny, device=d, dtype=t), torch.arange(nx, device=d, dtype=t) - yv, xv = torch.meshgrid(y, x, indexing="ij") if torch_1_10 else torch.meshgrid(y, x) # torch>=0.7 compatibility - grid = torch.stack((xv, yv), 2).expand(shape) - 0.5 # add grid offset, i.e. y = 2.0 * x - 0.5 - anchor_grid = (self.anchors[i] * self.stride[i]).view((1, self.na, 1, 1, 2)).expand(shape) - return grid, anchor_grid - - -class Segment(Detect): - # YOLOv5 Segment head for segmentation models - def __init__(self, nc=80, anchors=(), nm=32, npr=256, ch=(), inplace=True): - """Initializes YOLOv5 Segment head with options for mask count, protos, and channel adjustments.""" - super().__init__(nc, anchors, ch, inplace) - self.nm = nm # number of masks - self.npr = npr # number of protos - self.no = 5 + nc + self.nm # number of outputs per anchor - self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) # output conv - self.proto = Proto(ch[0], self.npr, self.nm) # protos - self.detect = Detect.forward + if not self.training: + if self.grid[i].shape[2:4] != x[i].shape[2:4] or self.onnx_dynamic: + self.grid[i] = self._make_grid(nx, ny).to(x[i].device) - def forward(self, x): - """Processes input through the network, returning detections and prototypes; adjusts output based on - training/export mode. - """ - p = self.proto(x[0]) - x = self.detect(self, x) - return (x, p) if self.training else (x[0], p) if self.export else (x[0], p, x[1]) - - -class BaseModel(nn.Module): - """YOLOv5 base model.""" - - def forward(self, x, profile=False, visualize=False): - """Executes a single-scale inference or training pass on the YOLOv5 base model, with options for profiling and - visualization. - """ - return self._forward_once(x, profile, visualize) # single-scale inference, train - - def _forward_once(self, x, profile=False, visualize=False): - """Performs a forward pass on the YOLOv5 model, enabling profiling and feature visualization options.""" - y, dt = [], [] # outputs - for m in self.model: - if m.f != -1: # if not from previous layer - x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f] # from earlier layers - if profile: - self._profile_one_layer(m, x, dt) - x = m(x) # run - y.append(x if m.i in self.save else None) # save output - if visualize: - feature_visualization(x, m.type, m.i, save_dir=visualize) - return x - - def _profile_one_layer(self, m, x, dt): - """Profiles a single layer's performance by computing GFLOPs, execution time, and parameters.""" - c = m == self.model[-1] # is final layer, copy input as inplace fix - o = thop.profile(m, inputs=(x.copy() if c else x,), verbose=False)[0] / 1e9 * 2 if thop else 0 # FLOPs - t = time_sync() - for _ in range(10): - m(x.copy() if c else x) - dt.append((time_sync() - t) * 100) - if m == self.model[0]: - LOGGER.info(f"{'time (ms)':>10s} {'GFLOPs':>10s} {'params':>10s} module") - LOGGER.info(f"{dt[-1]:10.2f} {o:10.2f} {m.np:10.0f} {m.type}") - if c: - LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s} Total") - - def fuse(self): - """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed.""" - LOGGER.info("Fusing layers... ") - for m in self.model.modules(): - if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"): - m.conv = fuse_conv_and_bn(m.conv, m.bn) # update conv - delattr(m, "bn") # remove batchnorm - m.forward = m.forward_fuse # update forward - self.info() - return self - - def info(self, verbose=False, img_size=640): - """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`.""" - model_info(self, verbose, img_size) - - def _apply(self, fn): - """Applies transformations like to(), cpu(), cuda(), half() to model tensors excluding parameters or registered - buffers. - """ - self = super()._apply(fn) - m = self.model[-1] # Detect() - if isinstance(m, (Detect, Segment)): - m.stride = fn(m.stride) - m.grid = list(map(fn, m.grid)) - if isinstance(m.anchor_grid, list): - m.anchor_grid = list(map(fn, m.anchor_grid)) - return self - - -class DetectionModel(BaseModel): - # YOLOv5 detection model - def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None): - """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors.""" - super().__init__() - if isinstance(cfg, dict): - self.yaml = cfg # model dict - else: # is *.yaml - import yaml # for torch hub - - self.yaml_file = Path(cfg).name - with open(cfg, encoding="ascii", errors="ignore") as f: - self.yaml = yaml.safe_load(f) # model dict - - # Define model - ch = self.yaml["ch"] = self.yaml.get("ch", ch) # input channels - if nc and nc != self.yaml["nc"]: - LOGGER.info(f"Overriding model.yaml nc={self.yaml['nc']} with nc={nc}") - self.yaml["nc"] = nc # override yaml value - if anchors: - LOGGER.info(f"Overriding model.yaml anchors with anchors={anchors}") - self.yaml["anchors"] = round(anchors) # override yaml value - self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch]) # model, savelist - self.names = [str(i) for i in range(self.yaml["nc"])] # default names - self.inplace = self.yaml.get("inplace", True) - - # Build strides, anchors - m = self.model[-1] # Detect() - if isinstance(m, (Detect, Segment)): - - def _forward(x): - """Passes the input 'x' through the model and returns the processed output.""" - return self.forward(x)[0] if isinstance(m, Segment) else self.forward(x) - - s = 256 # 2x min stride - m.inplace = self.inplace - m.stride = torch.tensor([s / x.shape[-2] for x in _forward(torch.zeros(1, ch, s, s))]) # forward - check_anchor_order(m) - m.anchors /= m.stride.view(-1, 1, 1) - self.stride = m.stride - self._initialize_biases() # only run once - - # Init weights, biases - initialize_weights(self) - self.info() - LOGGER.info("") - - def forward(self, x, augment=False, profile=False, visualize=False): - """Performs single-scale or augmented inference and may include profiling or visualization.""" - if augment: - return self._forward_augment(x) # augmented inference, None - return self._forward_once(x, profile, visualize) # single-scale inference, train - - def _forward_augment(self, x): - """Performs augmented inference across different scales and flips, returning combined detections.""" - img_size = x.shape[-2:] # height, width - s = [1, 0.83, 0.67] # scales - f = [None, 3, None] # flips (2-ud, 3-lr) - y = [] # outputs - for si, fi in zip(s, f): - xi = scale_img(x.flip(fi) if fi else x, si, gs=int(self.stride.max())) - yi = self._forward_once(xi)[0] # forward - # cv2.imwrite(f'img_{si}.jpg', 255 * xi[0].cpu().numpy().transpose((1, 2, 0))[:, :, ::-1]) # save - yi = self._descale_pred(yi, fi, si, img_size) - y.append(yi) - y = self._clip_augmented(y) # clip augmented tails - return torch.cat(y, 1), None # augmented inference, train - - def _descale_pred(self, p, flips, scale, img_size): - """De-scales predictions from augmented inference, adjusting for flips and image size.""" - if self.inplace: - p[..., :4] /= scale # de-scale - if flips == 2: - p[..., 1] = img_size[0] - p[..., 1] # de-flip ud - elif flips == 3: - p[..., 0] = img_size[1] - p[..., 0] # de-flip lr - else: - x, y, wh = p[..., 0:1] / scale, p[..., 1:2] / scale, p[..., 2:4] / scale # de-scale - if flips == 2: - y = img_size[0] - y # de-flip ud - elif flips == 3: - x = img_size[1] - x # de-flip lr - p = torch.cat((x, y, wh, p[..., 4:]), -1) - return p - - def _clip_augmented(self, y): - """Clips augmented inference tails for YOLOv5 models, affecting first and last tensors based on grid points and - layer counts. - """ - nl = self.model[-1].nl # number of detection layers (P3-P5) - g = sum(4**x for x in range(nl)) # grid points - e = 1 # exclude layer count - i = (y[0].shape[1] // g) * sum(4**x for x in range(e)) # indices - y[0] = y[0][:, :-i] # large - i = (y[-1].shape[1] // g) * sum(4 ** (nl - 1 - x) for x in range(e)) # indices - y[-1] = y[-1][:, i:] # small - return y - - def _initialize_biases(self, cf=None): - """ - Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf). - - For details see https://arxiv.org/abs/1708.02002 section 3.3. - """ - # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1. - m = self.model[-1] # Detect() module - for mi, s in zip(m.m, m.stride): # from - b = mi.bias.view(m.na, -1) # conv.bias(255) to (3,85) - b.data[:, 4] += math.log(8 / (640 / s) ** 2) # obj (8 objects per 640 image) - b.data[:, 5 : 5 + m.nc] += ( - math.log(0.6 / (m.nc - 0.99999)) if cf is None else torch.log(cf / cf.sum()) - ) # cls - mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True) - - -Model = DetectionModel # retain YOLOv5 'Model' class for backwards compatibility - - -class SegmentationModel(DetectionModel): - # YOLOv5 segmentation model - def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None): - """Initializes a YOLOv5 segmentation model with configurable params: cfg (str) for configuration, ch (int) for channels, nc (int) for num classes, anchors (list).""" - super().__init__(cfg, ch, nc, anchors) - - -class ClassificationModel(BaseModel): - # YOLOv5 classification model - def __init__(self, cfg=None, model=None, nc=1000, cutoff=10): - """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cutoff` - index. - """ + y = x[i].sigmoid() + y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] + y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] + z.append(y.view(bs, -1, self.no)) + + return x if self.training else (torch.cat(z, 1), x) + + @staticmethod + def _make_grid(nx=20, ny=20): + yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) + return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + +class Model(nn.Module): + def __init__(self, nc=80): super().__init__() - self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg) - - def _from_detection_model(self, model, nc=1000, cutoff=10): - """Creates a classification model from a YOLOv5 detection model, slicing at `cutoff` and adding a classification - layer. - """ - if isinstance(model, DetectMultiBackend): - model = model.model # unwrap DetectMultiBackend - model.model = model.model[:cutoff] # backbone - m = model.model[-1] # last layer - ch = m.conv.in_channels if hasattr(m, "conv") else m.cv1.conv.in_channels # ch into module - c = Classify(ch, nc) # Classify() - c.i, c.f, c.type = m.i, m.f, "models.common.Classify" # index, from, type - model.model[-1] = c # replace - self.model = model.model - self.stride = model.stride - self.save = [] self.nc = nc - def _from_yaml(self, cfg): - """Creates a YOLOv5 classification model from a specified *.yaml configuration file.""" - self.model = None - - -def parse_model(d, ch): - """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture.""" - LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10} {'module':<40}{'arguments':<30}") - anchors, nc, gd, gw, act, ch_mul = ( - d["anchors"], - d["nc"], - d["depth_multiple"], - d["width_multiple"], - d.get("activation"), - d.get("channel_multiple"), - ) - if act: - Conv.default_act = eval(act) # redefine default activation, i.e. Conv.default_act = nn.SiLU() - LOGGER.info(f"{colorstr('activation:')} {act}") # print - if not ch_mul: - ch_mul = 8 - na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors # number of anchors - no = na * (nc + 5) # number of outputs = anchors * (classes + 5) - - layers, save, c2 = [], [], ch[-1] # layers, savelist, ch out - for i, (f, n, m, args) in enumerate(d["backbone"] + d["head"]): # from, number, module, args - m = eval(m) if isinstance(m, str) else m # eval strings - for j, a in enumerate(args): - with contextlib.suppress(NameError): - args[j] = eval(a) if isinstance(a, str) else a # eval strings - - n = n_ = max(round(n * gd), 1) if n > 1 else n # depth gain - if m in { - Conv, - GhostConv, - Bottleneck, - GhostBottleneck, - SPP, - SPPF, - DWConv, - MixConv2d, - Focus, - CrossConv, - BottleneckCSP, - C3, - C3TR, - C3SPP, - C3Ghost, - nn.ConvTranspose2d, - DWConvTranspose2d, - C3x, - }: - c1, c2 = ch[f], args[0] - if c2 != no: # if not output - c2 = make_divisible(c2 * gw, ch_mul) - - args = [c1, c2, *args[1:]] - if m in {BottleneckCSP, C3, C3TR, C3Ghost, C3x}: - args.insert(2, n) # number of repeats - n = 1 - elif m is nn.BatchNorm2d: - args = [ch[f]] - elif m is Concat: - c2 = sum(ch[x] for x in f) - # TODO: channel, gw, gd - elif m in {Detect, Segment}: - args.append([ch[x] for x in f]) - if isinstance(args[1], int): # number of anchors - args[1] = [list(range(args[1] * 2))] * len(f) - if m is Segment: - args[3] = make_divisible(args[3] * gw, ch_mul) - elif m is Contract: - c2 = ch[f] * args[0] ** 2 - elif m is Expand: - c2 = ch[f] // args[0] ** 2 + # YOLOv5s architecture + self.model = nn.Sequential( + Conv(3, 32, 6, 2, 2), + Conv(32, 64, 3, 2), + C3(64, 64, 1), + Conv(64, 128, 3, 2), + C3(128, 128, 2), + Conv(128, 256, 3, 2), + C3(256, 256, 3), + Conv(256, 512, 3, 2), + C3(512, 512, 1), + SPPF(512, 512), + Conv(512, 256, 1, 1), + nn.Upsample(None, 2, 'nearest'), + C3(512, 256, 1, False), + Conv(256, 128, 1, 1), + nn.Upsample(None, 2, 'nearest'), + C3(256, 128, 1, False), + Conv(128, 128, 3, 2), + C3(256, 256, 1, False), + Conv(256, 256, 3, 2), + C3(512, 512, 1, False), + Detect(nc, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]) + ) + + self.stride = torch.tensor([8., 16., 32.]) + self.model[-1].stride = self.stride + + def forward(self, x): + return self.model(x) + +class YOLOLoss(nn.Module): + def __init__(self, nc=80, anchors=(), reduction='mean', device='cpu'): + super(YOLOLoss, self).__init__() + self.nc = nc + self.nl = len(anchors) + self.na = len(anchors[0]) // 2 + self.anchors = torch.tensor(anchors).float().view(self.nl, -1, 2).to(device) + self.reduction = reduction + + self.BCEcls = nn.BCEWithLogitsLoss(reduction=reduction) + self.BCEobj = nn.BCEWithLogitsLoss(reduction=reduction) + self.gr = 1.0 + self.box_gain = 0.05 + self.cls_gain = 0.5 + self.obj_gain = 1.0 + + def forward(self, p, targets): + lcls, lbox, lobj = torch.zeros(1, device=targets.device), torch.zeros(1, device=targets.device), torch.zeros(1, device=targets.device) + tcls, tbox, indices, anchors = self.build_targets(p, targets) + + for i, pi in enumerate(p): + b, a, gj, gi = indices[i] + tobj = torch.zeros_like(pi[..., 0], device=targets.device) + + n = b.shape[0] + if n: + ps = pi[b, a, gj, gi] + pxy = ps[:, :2].sigmoid() * 2. - 0.5 + pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] + pbox = torch.cat((pxy, pwh), 1) + iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) + lbox += (1.0 - iou).mean() + + tobj[b, a, gj, gi] = (1.0 - self.gr) + self.gr * iou.detach().clamp(0).type(tobj.dtype) + + if self.nc > 1: + t = torch.full_like(ps[:, 5:], 0.0, device=targets.device) + t[range(n), tcls[i]] = 1.0 + lcls += self.BCEcls(ps[:, 5:], t) + + lobj += self.BCEobj(pi[..., 4], tobj) * self.obj_gain + + lbox *= self.box_gain + lobj *= self.obj_gain + lcls *= self.cls_gain + bs = tobj.shape[0] + + loss = lbox + lobj + lcls + return loss * bs, torch.cat((lbox, lobj, lcls)).detach() + + def build_targets(self, p, targets): + na, nt = self.na, targets.shape[0] + tcls, tbox, indices, anch = [], [], [], [] + gain = torch.ones(7, device=targets.device) + ai = torch.arange(na, device=targets.device).float().view(na, 1).repeat(1, nt) + targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) + + g = 0.5 + off = torch.tensor([[0, 0], + [1, 0], [0, 1], [-1, 0], [0, -1], + [1, 1], [1, -1], [-1, 1], [-1, -1]], device=targets.device).float() * g + + for i in range(self.nl): + anchors = self.anchors[i] + gain[2:6] = torch.tensor(p[i].shape)[[3, 2, 3, 2]] + + t = targets * gain + if nt: + r = t[:, :, 4:6] / anchors[:, None] + j = torch.max(r, 1. / r).max(2)[0] < 4 + t = t[j] + + gxy = t[:, 2:4] + gxi = gain[[2, 3]] - gxy + j, k = ((gxy % 1. < g) & (gxy > 1.)).T + l, m = ((gxi % 1. < g) & (gxi > 1.)).T + j = torch.stack((torch.ones_like(j), j, k, l, m)) + t = t.repeat((5, 1, 1))[j] + offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] + else: + t = targets[0] + offsets = 0 + + b, c = t[:, :2].long().T + gxy = t[:, 2:4] + gwh = t[:, 4:6] + gij = (gxy - offsets).long() + gi, gj = gij.T + + a = t[:, 6].long() + indices.append((b, a, gj.clamp_(0, gain[3] - 1), gi.clamp_(0, gain[2] - 1))) + tbox.append(torch.cat((gxy - gij, gwh), 1)) + anch.append(anchors[a]) + tcls.append(c) + + return tcls, tbox, indices, anch + +def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): + box2 = box2.T + + if x1y1x2y2: + b1_x1, b1_y1, b1_x2, b1_y2 = box1[0], box1[1], box1[2], box1[3] + b2_x1, b2_y1, b2_x2, b2_y2 = box2[0], box2[1], box2[2], box2[3] + else: + b1_x1, b1_x2 = box1[0] - box1[2] / 2, box1[0] + box1[2] / 2 + b1_y1, b1_y2 = box1[1] - box1[3] / 2, box1[1] + box1[3] / 2 + b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 + b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 + + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ + (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + + w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps + w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps + union = w1 * h1 + w2 * h2 - inter + eps + + iou = inter / union + + if GIoU or DIoU or CIoU: + cw = torch.max(b1_x2, b2_x2) - torch.min(b1_x1, b2_x1) + ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) + + if CIoU or DIoU: + c2 = cw ** 2 + ch ** 2 + eps + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 + if DIoU: + return iou - rho2 / c2 + elif CIoU: + v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + with torch.no_grad(): + alpha = v / (v - iou + (1 + eps)) + return iou - (rho2 / c2 + v * alpha) + else: + c_area = cw * ch + eps + return iou - (c_area - union) / c_area + + return iou + +def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300): + nc = prediction.shape[2] - 5 + xc = prediction[..., 4] > conf_thres + + # Checks + assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' + assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + + # Settings + min_wh, max_wh = 2, 4096 + max_nms = 30000 + time_limit = 10.0 + redundant = True + multi_label &= nc > 1 + merge = False + + t = time.time() + output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0] + for xi, x in enumerate(prediction): + x = x[xc[xi]] + + if not x.shape[0]: + continue + + x[:, 5:] *= x[:, 4:5] + + box = xywh2xyxy(x[:, :4]) + + if multi_label: + i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T + x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1) else: - c2 = ch[f] - - m_ = nn.Sequential(*(m(*args) for _ in range(n))) if n > 1 else m(*args) # module - t = str(m)[8:-2].replace("__main__.", "") # module type - np = sum(x.numel() for x in m_.parameters()) # number params - m_.i, m_.f, m_.type, m_.np = i, f, t, np # attach index, 'from' index, type, number params - LOGGER.info(f"{i:>3}{str(f):>18}{n_:>3}{np:10.0f} {t:<40}{str(args):<30}") # print - save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1) # append to savelist - layers.append(m_) - if i == 0: - ch = [] - ch.append(c2) - return nn.Sequential(*layers), sorted(save) - - -if __name__ == "__main__": + conf, j = x[:, 5:].max(1, keepdim=True) + x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres] + + if classes is not None: + x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)] + + n = x.shape[0] + if not n: + continue + elif n > max_nms: + x = x[x[:, 4].argsort(descending=True)[:max_nms]] + + c = x[:, 5:6] * (0 if agnostic else max_wh) + boxes, scores = x[:, :4] + c, x[:, 4] + i = torchvision.ops.nms(boxes, scores, iou_thres) + if i.shape[0] > max_det: + i = i[:max_det] + if merge and (1 < n < 3E3): + iou = box_iou(boxes[i], boxes) > iou_thres + weights = iou * scores[None] + x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) + if redundant: + i = i[iou.sum(1) > 1] + + output[xi] = x[i] + if (time.time() - t) > time_limit: + print(f'WARNING: NMS time limit {time_limit}s exceeded') + break + + return output + +def xywh2xyxy(x): + y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) + y[:, 0] = x[:, 0] - x[:, 2] / 2 + y[:, 1] = x[:, 1] - x[:, 3] / 2 + y[:, 2] = x[:, 0] + x[:, 2] / 2 + y[:, 3] = x[:, 1] + x[:, 3] / 2 + return y + +class DataLoader: + def __init__(self, path, img_size=640, batch_size=16): + self.path = path + self.img_size = img_size + self.batch_size = batch_size + self.augment = True + self.hyp = {'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0} + # Load data from path and prepare it + + def __iter__(self): + # Yield batches of data + pass + +def train(model, dataloader, optimizer, epochs): + device = next(model.parameters()).device + criterion = YOLOLoss(model.nc, model.model[-1].anchors, reduction='mean', device=device) + + for epoch in range(epochs): + model.train() + for batch_i, (imgs, targets, paths, _) in enumerate(dataloader): + imgs = imgs.to(device, non_blocking=True).float() / 255.0 + targets = targets.to(device) + + pred = model(imgs) + loss, loss_items = criterion(pred, targets) + + loss.backward() + optimizer.step() + optimizer.zero_grad() + + if batch_i % 10 == 0: + print(f"Epoch {epoch}/{epochs}, Batch {batch_i}/{len(dataloader)}, Loss: {loss.item():.4f}") + +def main(): parser = argparse.ArgumentParser() - parser.add_argument("--cfg", type=str, default="yolov5s.yaml", help="model.yaml") - parser.add_argument("--batch-size", type=int, default=1, help="total batch size for all GPUs") - parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu") - parser.add_argument("--profile", action="store_true", help="profile model speed") - parser.add_argument("--line-profile", action="store_true", help="profile model speed layer by layer") - parser.add_argument("--test", action="store_true", help="test all yolo*.yaml") + parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') + parser.add_argument('--epochs', type=int, default=300) + parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') + parser.add_argument('--img-size', type=int, default=640, help='train, test image sizes') + parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') opt = parser.parse_args() - opt.cfg = check_yaml(opt.cfg) # check YAML - print_args(vars(opt)) - device = select_device(opt.device) - - # Create model - im = torch.rand(opt.batch_size, 3, 640, 640).to(device) - model = Model(opt.cfg).to(device) - - # Options - if opt.line_profile: # profile layer by layer - model(im, profile=True) - - elif opt.profile: # profile forward-backward - results = profile(input=im, ops=[model], n=3) - - elif opt.test: # test all models - for cfg in Path(ROOT / "models").rglob("yolo*.yaml"): - try: - _ = Model(cfg) - except Exception as e: - print(f"Error in {cfg}: {e}") - - else: # report fused model summary - model.fuse() + + device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') + + # Initialize model + model = Model(nc=80).to(device) + + # Initialize optimizer + optimizer = optim.Adam(model.parameters(), lr=0.01) + + # Initialize dataloader + dataloader = DataLoader(opt.data, img_size=opt.img_size, batch_size=opt.batch_size) + + # Train the model + train(model, dataloader, optimizer, opt.epochs) + +if __name__ == '__main__': + main() \ No newline at end of file From 76f541d0926f6f90e5d62ae38911802603704858 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Fri, 21 Jun 2024 12:00:02 +0000 Subject: [PATCH 4/5] Auto-format by https://ultralytics.com/actions --- models/yolo.py | 127 +++++++++++++++++++++++++++++++------------------ 1 file changed, 81 insertions(+), 46 deletions(-) diff --git a/models/yolo.py b/models/yolo.py index 3a9fb93fecfe..ad62fd5b6f10 100644 --- a/models/yolo.py +++ b/models/yolo.py @@ -1,17 +1,18 @@ +import argparse +import math + +import numpy as np import torch import torch.nn as nn import torch.optim as optim -import torch.nn.functional as F -import numpy as np -from typing import List, Tuple, Optional -import math -import argparse + def autopad(k, p=None): if p is None: p = k // 2 if isinstance(k, int) else [x // 2 for x in k] return p + class Conv(nn.Module): def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): super().__init__() @@ -22,6 +23,7 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True): def forward(self, x): return self.act(self.bn(self.conv(x))) + class Bottleneck(nn.Module): def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): super().__init__() @@ -33,6 +35,7 @@ def __init__(self, c1, c2, shortcut=True, g=1, e=0.5): def forward(self, x): return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x)) + class C3(nn.Module): def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): super().__init__() @@ -45,6 +48,7 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5): def forward(self, x): return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1)) + class SPPF(nn.Module): def __init__(self, c1, c2, k=5): super().__init__() @@ -59,6 +63,7 @@ def forward(self, x): y2 = self.m(y1) return self.cv2(torch.cat([x, y1, y2, self.m(y2)], 1)) + class Detect(nn.Module): stride = None onnx_dynamic = False @@ -71,8 +76,8 @@ def __init__(self, nc=80, anchors=(), ch=()): self.na = len(anchors[0]) // 2 self.grid = [torch.zeros(1)] * self.nl a = torch.tensor(anchors).float().view(self.nl, -1, 2) - self.register_buffer('anchors', a) - self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2)) + self.register_buffer("anchors", a) + self.register_buffer("anchor_grid", a.clone().view(self.nl, 1, -1, 1, 1, 2)) self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch) def forward(self, x): @@ -87,7 +92,7 @@ def forward(self, x): self.grid[i] = self._make_grid(nx, ny).to(x[i].device) y = x[i].sigmoid() - y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + self.grid[i]) * self.stride[i] + y[..., 0:2] = (y[..., 0:2] * 2.0 - 0.5 + self.grid[i]) * self.stride[i] y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * self.anchor_grid[i] z.append(y.view(bs, -1, self.no)) @@ -98,6 +103,7 @@ def _make_grid(nx=20, ny=20): yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)]) return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float() + class Model(nn.Module): def __init__(self, nc=80): super().__init__() @@ -116,26 +122,31 @@ def __init__(self, nc=80): C3(512, 512, 1), SPPF(512, 512), Conv(512, 256, 1, 1), - nn.Upsample(None, 2, 'nearest'), + nn.Upsample(None, 2, "nearest"), C3(512, 256, 1, False), Conv(256, 128, 1, 1), - nn.Upsample(None, 2, 'nearest'), + nn.Upsample(None, 2, "nearest"), C3(256, 128, 1, False), Conv(128, 128, 3, 2), C3(256, 256, 1, False), Conv(256, 256, 3, 2), C3(512, 512, 1, False), - Detect(nc, [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], [128, 256, 512]) + Detect( + nc, + [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], + [128, 256, 512], + ), ) - self.stride = torch.tensor([8., 16., 32.]) + self.stride = torch.tensor([8.0, 16.0, 32.0]) self.model[-1].stride = self.stride def forward(self, x): return self.model(x) + class YOLOLoss(nn.Module): - def __init__(self, nc=80, anchors=(), reduction='mean', device='cpu'): + def __init__(self, nc=80, anchors=(), reduction="mean", device="cpu"): super(YOLOLoss, self).__init__() self.nc = nc self.nl = len(anchors) @@ -151,7 +162,11 @@ def __init__(self, nc=80, anchors=(), reduction='mean', device='cpu'): self.obj_gain = 1.0 def forward(self, p, targets): - lcls, lbox, lobj = torch.zeros(1, device=targets.device), torch.zeros(1, device=targets.device), torch.zeros(1, device=targets.device) + lcls, lbox, lobj = ( + torch.zeros(1, device=targets.device), + torch.zeros(1, device=targets.device), + torch.zeros(1, device=targets.device), + ) tcls, tbox, indices, anchors = self.build_targets(p, targets) for i, pi in enumerate(p): @@ -161,7 +176,7 @@ def forward(self, p, targets): n = b.shape[0] if n: ps = pi[b, a, gj, gi] - pxy = ps[:, :2].sigmoid() * 2. - 0.5 + pxy = ps[:, :2].sigmoid() * 2.0 - 0.5 pwh = (ps[:, 2:4].sigmoid() * 2) ** 2 * anchors[i] pbox = torch.cat((pxy, pwh), 1) iou = bbox_iou(pbox.T, tbox[i], x1y1x2y2=False, CIoU=True) @@ -192,9 +207,12 @@ def build_targets(self, p, targets): targets = torch.cat((targets.repeat(na, 1, 1), ai[:, :, None]), 2) g = 0.5 - off = torch.tensor([[0, 0], - [1, 0], [0, 1], [-1, 0], [0, -1], - [1, 1], [1, -1], [-1, 1], [-1, -1]], device=targets.device).float() * g + off = ( + torch.tensor( + [[0, 0], [1, 0], [0, 1], [-1, 0], [0, -1], [1, 1], [1, -1], [-1, 1], [-1, -1]], device=targets.device + ).float() + * g + ) for i in range(self.nl): anchors = self.anchors[i] @@ -203,13 +221,13 @@ def build_targets(self, p, targets): t = targets * gain if nt: r = t[:, :, 4:6] / anchors[:, None] - j = torch.max(r, 1. / r).max(2)[0] < 4 + j = torch.max(r, 1.0 / r).max(2)[0] < 4 t = t[j] gxy = t[:, 2:4] gxi = gain[[2, 3]] - gxy - j, k = ((gxy % 1. < g) & (gxy > 1.)).T - l, m = ((gxi % 1. < g) & (gxi > 1.)).T + j, k = ((gxy % 1.0 < g) & (gxy > 1.0)).T + l, m = ((gxi % 1.0 < g) & (gxi > 1.0)).T j = torch.stack((torch.ones_like(j), j, k, l, m)) t = t.repeat((5, 1, 1))[j] offsets = (torch.zeros_like(gxy)[None] + off[:, None])[j] @@ -231,6 +249,7 @@ def build_targets(self, p, targets): return tcls, tbox, indices, anch + def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7): box2 = box2.T @@ -243,8 +262,9 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= b2_x1, b2_x2 = box2[0] - box2[2] / 2, box2[0] + box2[2] / 2 b2_y1, b2_y2 = box2[1] - box2[3] / 2, box2[1] + box2[3] / 2 - inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * \ - (torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1)).clamp(0) + inter = (torch.min(b1_x2, b2_x2) - torch.max(b1_x1, b2_x1)).clamp(0) * ( + torch.min(b1_y2, b2_y2) - torch.max(b1_y1, b2_y1) + ).clamp(0) w1, h1 = b1_x2 - b1_x1, b1_y2 - b1_y1 + eps w2, h2 = b2_x2 - b2_x1, b2_y2 - b2_y1 + eps @@ -257,13 +277,12 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= ch = torch.max(b1_y2, b2_y2) - torch.min(b1_y1, b2_y1) if CIoU or DIoU: - c2 = cw ** 2 + ch ** 2 + eps - rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + - (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 + c2 = cw**2 + ch**2 + eps + rho2 = ((b2_x1 + b2_x2 - b1_x1 - b1_x2) ** 2 + (b2_y1 + b2_y2 - b1_y1 - b1_y2) ** 2) / 4 if DIoU: return iou - rho2 / c2 elif CIoU: - v = (4 / math.pi ** 2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) + v = (4 / math.pi**2) * torch.pow(torch.atan(w2 / h2) - torch.atan(w1 / h1), 2) with torch.no_grad(): alpha = v / (v - iou + (1 + eps)) return iou - (rho2 / c2 + v * alpha) @@ -273,13 +292,16 @@ def bbox_iou(box1, box2, x1y1x2y2=True, GIoU=False, DIoU=False, CIoU=False, eps= return iou -def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300): + +def non_max_suppression( + prediction, conf_thres=0.25, iou_thres=0.45, classes=None, agnostic=False, multi_label=False, max_det=300 +): nc = prediction.shape[2] - 5 xc = prediction[..., 4] > conf_thres # Checks - assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0' - assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0' + assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" + assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" # Settings min_wh, max_wh = 2, 4096 @@ -322,7 +344,7 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non i = torchvision.ops.nms(boxes, scores, iou_thres) if i.shape[0] > max_det: i = i[:max_det] - if merge and (1 < n < 3E3): + if merge and (1 < n < 3e3): iou = box_iou(boxes[i], boxes) > iou_thres weights = iou * scores[None] x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) @@ -331,11 +353,12 @@ def non_max_suppression(prediction, conf_thres=0.25, iou_thres=0.45, classes=Non output[xi] = x[i] if (time.time() - t) > time_limit: - print(f'WARNING: NMS time limit {time_limit}s exceeded') + print(f"WARNING: NMS time limit {time_limit}s exceeded") break return output + def xywh2xyxy(x): y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x) y[:, 0] = x[:, 0] - x[:, 2] / 2 @@ -344,22 +367,32 @@ def xywh2xyxy(x): y[:, 3] = x[:, 1] + x[:, 3] / 2 return y + class DataLoader: def __init__(self, path, img_size=640, batch_size=16): self.path = path self.img_size = img_size self.batch_size = batch_size self.augment = True - self.hyp = {'hsv_h': 0.015, 'hsv_s': 0.7, 'hsv_v': 0.4, 'degrees': 0, 'translate': 0.1, 'scale': 0.5, 'shear': 0.0} + self.hyp = { + "hsv_h": 0.015, + "hsv_s": 0.7, + "hsv_v": 0.4, + "degrees": 0, + "translate": 0.1, + "scale": 0.5, + "shear": 0.0, + } # Load data from path and prepare it def __iter__(self): # Yield batches of data pass + def train(model, dataloader, optimizer, epochs): device = next(model.parameters()).device - criterion = YOLOLoss(model.nc, model.model[-1].anchors, reduction='mean', device=device) + criterion = YOLOLoss(model.nc, model.model[-1].anchors, reduction="mean", device=device) for epoch in range(epochs): model.train() @@ -377,28 +410,30 @@ def train(model, dataloader, optimizer, epochs): if batch_i % 10 == 0: print(f"Epoch {epoch}/{epochs}, Batch {batch_i}/{len(dataloader)}, Loss: {loss.item():.4f}") + def main(): parser = argparse.ArgumentParser() - parser.add_argument('--data', type=str, default='data/coco128.yaml', help='data.yaml path') - parser.add_argument('--epochs', type=int, default=300) - parser.add_argument('--batch-size', type=int, default=16, help='total batch size for all GPUs') - parser.add_argument('--img-size', type=int, default=640, help='train, test image sizes') - parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu') + parser.add_argument("--data", type=str, default="data/coco128.yaml", help="data.yaml path") + parser.add_argument("--epochs", type=int, default=300) + parser.add_argument("--batch-size", type=int, default=16, help="total batch size for all GPUs") + parser.add_argument("--img-size", type=int, default=640, help="train, test image sizes") + parser.add_argument("--device", default="", help="cuda device, i.e. 0 or 0,1,2,3 or cpu") opt = parser.parse_args() - device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu') - + device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") + # Initialize model model = Model(nc=80).to(device) - + # Initialize optimizer optimizer = optim.Adam(model.parameters(), lr=0.01) - + # Initialize dataloader dataloader = DataLoader(opt.data, img_size=opt.img_size, batch_size=opt.batch_size) - + # Train the model train(model, dataloader, optimizer, opt.epochs) -if __name__ == '__main__': - main() \ No newline at end of file + +if __name__ == "__main__": + main() From 0ed15c728201d392dd1a4b06f12728225850c093 Mon Sep 17 00:00:00 2001 From: UltralyticsAssistant Date: Sat, 24 Aug 2024 21:41:49 +0000 Subject: [PATCH 5/5] Auto-format by https://ultralytics.com/actions --- export.py | 1 + utils/augmentations.py | 1 - utils/callbacks.py | 1 - utils/dataloaders.py | 7 ++++--- utils/general.py | 2 -- utils/loggers/__init__.py | 3 ++- utils/loggers/clearml/clearml_utils.py | 14 +++++++------- utils/loggers/wandb/wandb_utils.py | 12 ++++++------ utils/metrics.py | 8 +++----- utils/segment/augmentations.py | 1 - utils/segment/general.py | 3 --- utils/triton.py | 3 +-- 12 files changed, 24 insertions(+), 32 deletions(-) diff --git a/export.py b/export.py index dfb1c06fb5e2..f3216a564290 100644 --- a/export.py +++ b/export.py @@ -449,6 +449,7 @@ def transform_fn(data_item): Quantization transform function. Extracts and preprocess input data from dataloader item for quantization. + Parameters: data_item: Tuple with data item produced by DataLoader during iteration Returns: diff --git a/utils/augmentations.py b/utils/augmentations.py index 4a6e441d7c45..bdbe07712716 100644 --- a/utils/augmentations.py +++ b/utils/augmentations.py @@ -156,7 +156,6 @@ def random_perspective( ): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(0.1, 0.1), scale=(0.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] - """Applies random perspective transformation to an image, modifying the image and corresponding labels.""" height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 diff --git a/utils/callbacks.py b/utils/callbacks.py index 0a0bcbdb2b96..21c587bd74c6 100644 --- a/utils/callbacks.py +++ b/utils/callbacks.py @@ -64,7 +64,6 @@ def run(self, hook, *args, thread=False, **kwargs): thread: (boolean) Run callbacks in daemon thread kwargs: Keyword Arguments to receive from YOLOv5 """ - assert hook in self._callbacks, f"hook '{hook}' not found in callbacks {self._callbacks}" for logger in self._callbacks[hook]: if thread: diff --git a/utils/dataloaders.py b/utils/dataloaders.py index 21308f0cedbd..bdeffec465e7 100644 --- a/utils/dataloaders.py +++ b/utils/dataloaders.py @@ -1104,7 +1104,8 @@ def extract_boxes(path=DATASETS_DIR / "coco128"): def autosplit(path=DATASETS_DIR / "coco128/images", weights=(0.9, 0.1, 0.0), annotated_only=False): """Autosplit a dataset into train/val/test splits and save path/autosplit_*.txt files Usage: from utils.dataloaders import *; autosplit() - Arguments + + Arguments: path: Path to images directory weights: Train, val, test weights (list, tuple) annotated_only: Only use images with an annotated txt file @@ -1183,7 +1184,7 @@ class HUBDatasetStats: """ Class for generating HUB dataset JSON and `-hub` dataset directory. - Arguments + Arguments: path: Path to data.yaml or data.zip (with data.yaml inside data.zip) autodownload: Attempt to download dataset if not found locally @@ -1314,7 +1315,7 @@ class ClassificationDataset(torchvision.datasets.ImageFolder): """ YOLOv5 Classification Dataset. - Arguments + Arguments: root: Dataset path transform: torchvision transforms, used by default album_transform: Albumentations transforms, used if installed diff --git a/utils/general.py b/utils/general.py index e311504b3031..57db68a7ac76 100644 --- a/utils/general.py +++ b/utils/general.py @@ -518,7 +518,6 @@ def check_font(font=FONT, progress=False): def check_dataset(data, autodownload=True): """Validates and/or auto-downloads a dataset, returning its configuration as a dictionary.""" - # Download (optional) extract_dir = "" if isinstance(data, (str, Path)) and (is_zipfile(data) or is_tarfile(data)): @@ -1023,7 +1022,6 @@ def non_max_suppression( Returns: list of detections, on (n,6) tensor per image [xyxy, conf, cls] """ - # Checks assert 0 <= conf_thres <= 1, f"Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0" assert 0 <= iou_thres <= 1, f"Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0" diff --git a/utils/loggers/__init__.py b/utils/loggers/__init__.py index 2bd8583d2ade..7051e8da0a29 100644 --- a/utils/loggers/__init__.py +++ b/utils/loggers/__init__.py @@ -350,7 +350,8 @@ class GenericLogger: """ YOLOv5 General purpose logger for non-task specific logging Usage: from utils.loggers import GenericLogger; logger = GenericLogger(...) - Arguments + + Arguments: opt: Run arguments console_logger: Console logger include: loggers to include diff --git a/utils/loggers/clearml/clearml_utils.py b/utils/loggers/clearml/clearml_utils.py index 2b5351ef8533..de4129e08a16 100644 --- a/utils/loggers/clearml/clearml_utils.py +++ b/utils/loggers/clearml/clearml_utils.py @@ -80,7 +80,7 @@ def __init__(self, opt, hyp): - Initialize ClearML Task, this object will capture the experiment - Upload dataset version to ClearML Data if opt.upload_dataset is True - arguments: + Arguments: opt (namespace) -- Commandline arguments for this run hyp (dict) -- Hyperparameters for this run @@ -133,7 +133,7 @@ def log_scalars(self, metrics, epoch): """ Log scalars/metrics to ClearML. - arguments: + Arguments: metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...} epoch (int) iteration number for the current set of metrics """ @@ -145,7 +145,7 @@ def log_model(self, model_path, model_name, epoch=0): """ Log model weights to ClearML. - arguments: + Arguments: model_path (PosixPath or str) Path to the model weights model_name (str) Name of the model visible in ClearML epoch (int) Iteration / epoch of the model weights @@ -158,7 +158,7 @@ def log_summary(self, metrics): """ Log final metrics to a summary table. - arguments: + Arguments: metrics (dict) Metrics in dict format: {"metrics/mAP": 0.8, ...} """ for k, v in metrics.items(): @@ -168,7 +168,7 @@ def log_plot(self, title, plot_path): """ Log image as plot in the plot section of ClearML. - arguments: + Arguments: title (str) Title of the plot plot_path (PosixPath or str) Path to the saved image file """ @@ -183,7 +183,7 @@ def log_debug_samples(self, files, title="Debug Samples"): """ Log files (images) as debug samples in the ClearML task. - arguments: + Arguments: files (List(PosixPath)) a list of file paths in PosixPath format title (str) A title that groups together images with the same values """ @@ -199,7 +199,7 @@ def log_image_with_boxes(self, image_path, boxes, class_names, image, conf_thres """ Draw the bounding boxes on a single image and report the result as a ClearML debug sample. - arguments: + Arguments: image_path (PosixPath) the path the original image file boxes (list): list of scaled predictions in the format - [xmin, ymin, xmax, ymax, confidence, class] class_names (dict): dict containing mapping of class int to class name diff --git a/utils/loggers/wandb/wandb_utils.py b/utils/loggers/wandb/wandb_utils.py index 930f2c7543af..6a32c8cc7b03 100644 --- a/utils/loggers/wandb/wandb_utils.py +++ b/utils/loggers/wandb/wandb_utils.py @@ -49,7 +49,7 @@ def __init__(self, opt, run_id=None, job_type="Training"): - Upload dataset if opt.upload_dataset is True - Setup training processes if job_type is 'Training' - arguments: + Arguments: opt (namespace) -- Commandline arguments for this run run_id (str) -- Run ID of W&B run to be resumed job_type (str) -- To set the job_type for this run @@ -90,7 +90,7 @@ def setup_training(self, opt): - Update data_dict, to contain info of previous run if resumed and the paths of dataset artifact if downloaded - Setup log_dict, initialize bbox_interval - arguments: + Arguments: opt (namespace) -- commandline arguments for this run """ @@ -120,7 +120,7 @@ def log_model(self, path, opt, epoch, fitness_score, best_model=False): """ Log the model checkpoint as W&B artifact. - arguments: + Arguments: path (Path) -- Path of directory containing the checkpoints opt (namespace) -- Command line arguments for this run epoch (int) -- Current epoch number @@ -159,7 +159,7 @@ def log(self, log_dict): """ Save the metrics to the logging dictionary. - arguments: + Arguments: log_dict (Dict) -- metrics/media to be logged in current step """ if self.wandb_run: @@ -170,7 +170,7 @@ def end_epoch(self): """ Commit the log_dict, model artifacts and Tables to W&B and flush the log_dict. - arguments: + Arguments: best_result (boolean): Boolean representing if the result of this evaluation is best or not """ if self.wandb_run: @@ -197,7 +197,7 @@ def finish_run(self): @contextmanager def all_logging_disabled(highest_level=logging.CRITICAL): - """source - https://gist.github.com/simon-weber/7853144 + """Source - https://gist.github.com/simon-weber/7853144 A context manager that will prevent any logging messages triggered during the body from being processed. :param highest_level: the maximum logging level in use. This would only need to be changed if a custom level greater than CRITICAL is defined. diff --git a/utils/metrics.py b/utils/metrics.py index 385fdc471748..9acc38591f96 100644 --- a/utils/metrics.py +++ b/utils/metrics.py @@ -41,7 +41,6 @@ def ap_per_class(tp, conf, pred_cls, target_cls, plot=False, save_dir=".", names # Returns The average precision as computed in py-faster-rcnn. """ - # Sort by objectness i = np.argsort(-conf) tp, conf, pred_cls = tp[i], conf[i], pred_cls[i] @@ -103,7 +102,6 @@ def compute_ap(recall, precision): # Returns Average precision, precision curve, recall curve """ - # Append sentinel values to beginning and end mrec = np.concatenate(([0.0], recall, [1.0])) mpre = np.concatenate(([1.0], precision, [0.0])) @@ -137,6 +135,7 @@ def process_batch(self, detections, labels): Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: detections (Array[N, 6]), x1, y1, x2, y2, conf, class labels (Array[M, 5]), class, x1, y1, x2, y2 @@ -233,7 +232,6 @@ def bbox_iou(box1, box2, xywh=True, GIoU=False, DIoU=False, CIoU=False, eps=1e-7 Input shapes are box1(1,4) to box2(n,4). """ - # Get the coordinates of bounding boxes if xywh: # transform from xywh to xyxy (x1, y1, w1, h1), (x2, y2, w2, h2) = box1.chunk(4, -1), box2.chunk(4, -1) @@ -279,14 +277,15 @@ def box_iou(box1, box2, eps=1e-7): Return intersection-over-union (Jaccard index) of boxes. Both sets of boxes are expected to be in (x1, y1, x2, y2) format. + Arguments: box1 (Tensor[N, 4]) box2 (Tensor[M, 4]) + Returns: iou (Tensor[N, M]): the NxM matrix containing the pairwise IoU values for every element in boxes1 and boxes2 """ - # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2) (a1, a2), (b1, b2) = box1.unsqueeze(1).chunk(2, 2), box2.unsqueeze(0).chunk(2, 2) inter = (torch.min(a2, b2) - torch.max(a1, b1)).clamp(0).prod(2) @@ -304,7 +303,6 @@ def bbox_ioa(box1, box2, eps=1e-7): box2: np.array of shape(nx4) returns: np.array of shape(n) """ - # Get the coordinates of bounding boxes b1_x1, b1_y1, b1_x2, b1_y2 = box1 b2_x1, b2_y1, b2_x2, b2_y2 = box2.T diff --git a/utils/segment/augmentations.py b/utils/segment/augmentations.py index d7dd8aec6691..2e1dca1198b0 100644 --- a/utils/segment/augmentations.py +++ b/utils/segment/augmentations.py @@ -29,7 +29,6 @@ def random_perspective( ): # torchvision.transforms.RandomAffine(degrees=(-10, 10), translate=(.1, .1), scale=(.9, 1.1), shear=(-10, 10)) # targets = [cls, xyxy] - """Applies random perspective, rotation, scale, shear, and translation augmentations to an image and targets.""" height = im.shape[0] + border[0] * 2 # shape(h,w,c) width = im.shape[1] + border[1] * 2 diff --git a/utils/segment/general.py b/utils/segment/general.py index 2f65d60238dd..0793470a95e4 100644 --- a/utils/segment/general.py +++ b/utils/segment/general.py @@ -14,7 +14,6 @@ def crop_mask(masks, boxes): - masks should be a size [n, h, w] tensor of masks - boxes should be a size [n, 4] tensor of bbox coords in relative point form """ - n, h, w = masks.shape x1, y1, x2, y2 = torch.chunk(boxes[:, :, None], 4, 1) # x1 shape(1,1,n) r = torch.arange(w, device=masks.device, dtype=x1.dtype)[None, None, :] # rows shape(1,w,1) @@ -33,7 +32,6 @@ def process_mask_upsample(protos, masks_in, bboxes, shape): return: h, w, n """ - c, mh, mw = protos.shape # CHW masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) masks = F.interpolate(masks[None], shape, mode="bilinear", align_corners=False)[0] # CHW @@ -51,7 +49,6 @@ def process_mask(protos, masks_in, bboxes, shape, upsample=False): return: h, w, n """ - c, mh, mw = protos.shape # CHW ih, iw = shape masks = (masks_in @ protos.float().view(c, -1)).sigmoid().view(-1, mh, mw) # CHW diff --git a/utils/triton.py b/utils/triton.py index 3d529ec88a07..2fee42815517 100644 --- a/utils/triton.py +++ b/utils/triton.py @@ -17,10 +17,9 @@ class TritonRemoteModel: def __init__(self, url: str): """ - Keyword arguments: + Keyword Arguments: url: Fully qualified address of the Triton server - for e.g. grpc://localhost:8000 """ - parsed_url = urlparse(url) if parsed_url.scheme == "grpc": from tritonclient.grpc import InferenceServerClient, InferInput