Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
* Refactor code for speed and clarity

* Update README.zh-CN.md

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>

---------

Signed-off-by: Glenn Jocher <glenn.jocher@ultralytics.com>
Co-authored-by: UltralyticsAssistant <web@ultralytics.com>
  • Loading branch information
glenn-jocher and UltralyticsAssistant authored Aug 26, 2024
1 parent b40781b commit c5ffbbf
Show file tree
Hide file tree
Showing 20 changed files with 164 additions and 73 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/merge-main-into-prs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -34,11 +34,11 @@ jobs:
g = Github(os.getenv('GITHUB_TOKEN'))
repo = g.get_repo(os.getenv('GITHUB_REPOSITORY'))
# Fetch the default branch name
default_branch_name = repo.default_branch
default_branch = repo.get_branch(default_branch_name)
for pr in repo.get_pulls(state='open', sort='created'):
try:
# Get full names for repositories and branches
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
<a href="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml"><img src="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg" alt="YOLOv5 CI"></a>
<a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="YOLOv5 Citation"></a>
<a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
<a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a>
<a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a> <a href="https://reddit.com/r/ultralytics"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
<br>
<a href="https://bit.ly/yolov5-paperspace-notebook"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"></a>
<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
Expand Down
1 change: 1 addition & 0 deletions README.zh-CN.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<a href="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml"><img src="https://github.com/ultralytics/yolov5/actions/workflows/ci-testing.yml/badge.svg" alt="YOLOv5 CI"></a>
<a href="https://zenodo.org/badge/latestdoi/264818686"><img src="https://zenodo.org/badge/264818686.svg" alt="YOLOv5 Citation"></a>
<a href="https://hub.docker.com/r/ultralytics/yolov5"><img src="https://img.shields.io/docker/pulls/ultralytics/yolov5?logo=docker" alt="Docker Pulls"></a>
<a href="https://ultralytics.com/discord"><img alt="Discord" src="https://img.shields.io/discord/1089800235347353640?logo=discord&logoColor=white&label=Discord&color=blue"></a> <a href="https://community.ultralytics.com"><img alt="Ultralytics Forums" src="https://img.shields.io/discourse/users?server=https%3A%2F%2Fcommunity.ultralytics.com&logo=discourse&label=Forums&color=blue"></a> <a href="https://reddit.com/r/ultralytics"><img alt="Ultralytics Reddit" src="https://img.shields.io/reddit/subreddit-subscribers/ultralytics?style=flat&logo=reddit&logoColor=white&label=Reddit&color=blue"></a>
<br>
<a href="https://bit.ly/yolov5-paperspace-notebook"><img src="https://assets.paperspace.io/img/gradient-badge.svg" alt="Run on Gradient"></a>
<a href="https://colab.research.google.com/github/ultralytics/yolov5/blob/master/tutorial.ipynb"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"></a>
Expand Down
2 changes: 2 additions & 0 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@


class iOSModel(torch.nn.Module):
"""An iOS-compatible wrapper for YOLOv5 models that normalizes input images based on their dimensions."""

def __init__(self, model, im):
"""
Initializes an iOS compatible model with normalization based on image dimensions.
Expand Down
78 changes: 52 additions & 26 deletions models/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,8 @@ def autopad(k, p=None, d=1):


class Conv(nn.Module):
# Standard convolution with args(ch_in, ch_out, kernel, stride, padding, groups, dilation, activation)
"""Applies a convolution, batch normalization, and activation function to an input tensor in a neural network."""

default_act = nn.SiLU() # default activation

def __init__(self, c1, c2, k=1, s=1, p=None, g=1, d=1, act=True):
Expand All @@ -91,7 +92,8 @@ def forward_fuse(self, x):


class DWConv(Conv):
# Depth-wise convolution
"""Implements a depth-wise convolution layer with optional activation for efficient spatial filtering."""

def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
"""Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output
channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act).
Expand All @@ -100,7 +102,8 @@ def __init__(self, c1, c2, k=1, s=1, d=1, act=True):


class DWConvTranspose2d(nn.ConvTranspose2d):
# Depth-wise transpose convolution
"""A depth-wise transpose convolutional layer for upsampling in neural networks, particularly in YOLOv5 models."""

def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
"""Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels
(c2), kernel size (k), stride (s), input padding (p1), output padding (p2).
Expand All @@ -109,7 +112,8 @@ def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):


class TransformerLayer(nn.Module):
# Transformer layer https://arxiv.org/abs/2010.11929 (LayerNorm layers removed for better performance)
"""Transformer layer with multihead attention and linear layers, optimized by removing LayerNorm."""

def __init__(self, c, num_heads):
"""
Initializes a transformer layer, sans LayerNorm for performance, with multihead attention and linear layers.
Expand All @@ -132,7 +136,8 @@ def forward(self, x):


class TransformerBlock(nn.Module):
# Vision Transformer https://arxiv.org/abs/2010.11929
"""A Transformer block for vision tasks with convolution, position embeddings, and Transformer layers."""

def __init__(self, c1, c2, num_heads, num_layers):
"""Initializes a Transformer block for vision tasks, adapting dimensions if necessary and stacking specified
layers.
Expand All @@ -157,7 +162,8 @@ def forward(self, x):


class Bottleneck(nn.Module):
# Standard bottleneck
"""A bottleneck layer with optional shortcut and group convolution for efficient feature extraction."""

def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
"""Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel
expansion.
Expand All @@ -176,7 +182,8 @@ def forward(self, x):


class BottleneckCSP(nn.Module):
# CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
"""CSP bottleneck layer for feature extraction with cross-stage partial connections and optional shortcuts."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,
groups, expansion.
Expand All @@ -201,7 +208,8 @@ def forward(self, x):


class CrossConv(nn.Module):
# Cross Convolution Downsample
"""Implements a cross convolution layer with downsampling, expansion, and optional shortcut."""

def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
"""
Initializes CrossConv with downsampling, expanding, and optionally shortcutting; `c1` input, `c2` output
Expand All @@ -221,7 +229,8 @@ def forward(self, x):


class C3(nn.Module):
# CSP Bottleneck with 3 convolutions
"""Implements a CSP Bottleneck module with three convolutions for enhanced feature extraction in neural networks."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group
convolutions, and expansion.
Expand All @@ -239,7 +248,8 @@ def forward(self, x):


class C3x(C3):
# C3 module with cross-convolutions
"""Extends the C3 module with cross-convolutions for enhanced feature extraction in neural networks."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3x module with cross-convolutions, extending C3 with customizable channel dimensions, groups,
and expansion.
Expand All @@ -250,7 +260,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):


class C3TR(C3):
# C3 module with TransformerBlock()
"""C3 module with TransformerBlock for enhanced feature extraction in object detection models."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes C3 module with TransformerBlock for enhanced feature extraction, accepts channel sizes, shortcut
config, group, and expansion.
Expand All @@ -261,7 +272,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):


class C3SPP(C3):
# C3 module with SPP()
"""Extends the C3 module with an SPP layer for enhanced spatial feature extraction and customizable channels."""

def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):
"""Initializes a C3 module with SPP layer for advanced spatial feature extraction, given channel sizes, kernel
sizes, shortcut, group, and expansion ratio.
Expand All @@ -272,7 +284,8 @@ def __init__(self, c1, c2, k=(5, 9, 13), n=1, shortcut=True, g=1, e=0.5):


class C3Ghost(C3):
# C3 module with GhostBottleneck()
"""Implements a C3 module with Ghost Bottlenecks for efficient feature extraction in YOLOv5."""

def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
"""Initializes YOLOv5's C3 module with Ghost Bottlenecks for efficient feature extraction."""
super().__init__(c1, c2, n, shortcut, g, e)
Expand All @@ -281,7 +294,8 @@ def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):


class SPP(nn.Module):
# Spatial Pyramid Pooling (SPP) layer https://arxiv.org/abs/1406.4729
"""Implements Spatial Pyramid Pooling (SPP) for feature extraction, ref: https://arxiv.org/abs/1406.4729."""

def __init__(self, c1, c2, k=(5, 9, 13)):
"""Initializes SPP layer with Spatial Pyramid Pooling, ref: https://arxiv.org/abs/1406.4729, args: c1 (input channels), c2 (output channels), k (kernel sizes)."""
super().__init__()
Expand All @@ -301,7 +315,8 @@ def forward(self, x):


class SPPF(nn.Module):
# Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
"""Implements a fast Spatial Pyramid Pooling (SPPF) layer for efficient feature extraction in YOLOv5 models."""

def __init__(self, c1, c2, k=5):
"""
Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution and
Expand All @@ -326,7 +341,8 @@ def forward(self, x):


class Focus(nn.Module):
# Focus wh information into c-space
"""Focuses spatial information into channel space using slicing and convolution for efficient feature extraction."""

def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
"""Initializes Focus module to concentrate width-height info into channel space with configurable convolution
parameters.
Expand All @@ -342,7 +358,8 @@ def forward(self, x):


class GhostConv(nn.Module):
# Ghost Convolution https://github.com/huawei-noah/ghostnet
"""Implements Ghost Convolution for efficient feature extraction, see https://github.com/huawei-noah/ghostnet."""

def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
"""Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channels
for efficiency.
Expand All @@ -359,7 +376,8 @@ def forward(self, x):


class GhostBottleneck(nn.Module):
# Ghost Bottleneck https://github.com/huawei-noah/ghostnet
"""Efficient bottleneck layer using Ghost Convolutions, see https://github.com/huawei-noah/ghostnet."""

def __init__(self, c1, c2, k=3, s=1):
"""Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see https://github.com/huawei-noah/ghostnet."""
super().__init__()
Expand All @@ -379,7 +397,8 @@ def forward(self, x):


class Contract(nn.Module):
# Contract width-height into channels, i.e. x(1,64,80,80) to x(1,256,40,40)
"""Contracts spatial dimensions into channel dimensions for efficient processing in neural networks."""

def __init__(self, gain=2):
"""Initializes a layer to contract spatial dimensions (width-height) into channels, e.g., input shape
(1,64,80,80) to (1,256,40,40).
Expand All @@ -399,7 +418,8 @@ def forward(self, x):


class Expand(nn.Module):
# Expand channels into width-height, i.e. x(1,64,80,80) to x(1,16,160,160)
"""Expands spatial dimensions by redistributing channels, e.g., from (1,64,80,80) to (1,16,160,160)."""

def __init__(self, gain=2):
"""
Initializes the Expand module to increase spatial dimensions by redistributing channels, with an optional gain
Expand All @@ -422,7 +442,8 @@ def forward(self, x):


class Concat(nn.Module):
# Concatenate a list of tensors along dimension
"""Concatenates tensors along a specified dimension for efficient tensor manipulation in neural networks."""

def __init__(self, dimension=1):
"""Initializes a Concat module to concatenate tensors along a specified dimension."""
super().__init__()
Expand All @@ -436,7 +457,8 @@ def forward(self, x):


class DetectMultiBackend(nn.Module):
# YOLOv5 MultiBackend class for python inference on various backends
"""YOLOv5 MultiBackend class for inference on various backends including PyTorch, ONNX, TensorRT, and more."""

def __init__(self, weights="yolov5s.pt", device=torch.device("cpu"), dnn=False, data=None, fp16=False, fuse=True):
"""Initializes DetectMultiBackend with support for various inference backends, including PyTorch and ONNX."""
# PyTorch: weights = *.pt
Expand Down Expand Up @@ -778,7 +800,8 @@ def _load_metadata(f=Path("path/to/meta.yaml")):


class AutoShape(nn.Module):
# YOLOv5 input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
"""AutoShape class for robust YOLOv5 inference with preprocessing, NMS, and support for various input formats."""

conf = 0.25 # NMS confidence threshold
iou = 0.45 # NMS IoU threshold
agnostic = False # NMS class-agnostic
Expand Down Expand Up @@ -889,7 +912,8 @@ def forward(self, ims, size=640, augment=False, profile=False):


class Detections:
# YOLOv5 detections class for inference results
"""Manages YOLOv5 detection results with methods for visualization, saving, cropping, and exporting detections."""

def __init__(self, ims, pred, files, times=(0, 0, 0), names=None, shape=None):
"""Initializes the YOLOv5 Detections class with image info, predictions, filenames, timing and normalization."""
super().__init__()
Expand Down Expand Up @@ -1047,7 +1071,8 @@ def __repr__(self):


class Proto(nn.Module):
# YOLOv5 mask Proto module for segmentation models
"""YOLOv5 mask Proto module for segmentation models, performing convolutions and upsampling on input tensors."""

def __init__(self, c1, c_=256, c2=32):
"""Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""
super().__init__()
Expand All @@ -1062,7 +1087,8 @@ def forward(self, x):


class Classify(nn.Module):
# YOLOv5 classification head, i.e. x(b,c1,20,20) to x(b,c2)
"""YOLOv5 classification head with convolution, pooling, and dropout layers for channel transformation."""

def __init__(
self, c1, c2, k=1, s=1, p=None, g=1, dropout_p=0.0
): # ch_in, ch_out, kernel, stride, padding, groups, dropout probability
Expand Down
Loading

0 comments on commit c5ffbbf

Please sign in to comment.