ultralytics · glenn-jocher · Feb 26, 2024 · Feb 25, 2024 · Feb 25, 2024 · Feb 25, 2024
diff --git a/models/common.py b/models/common.py
@@ -57,8 +57,12 @@
 from utils.torch_utils import copy_attr, smart_inference_mode
 
 
-def autopad(k, p=None, d=1):  # kernel, padding, dilation
-    # Pad to 'same' shape outputs
+def autopad(k, p=None, d=1):
+    """
+    Pads kernel to 'same' output shape, adjusting for optional dilation; returns padding size.
+
+    `k`: kernel, `p`: padding, `d`: dilation.
+    """
     if d > 1:
         k = d * (k - 1) + 1 if isinstance(k, int) else [d * (x - 1) + 1 for x in k]  # actual kernel-size
     if p is None:
@@ -88,13 +92,19 @@ def forward_fuse(self, x):
 
 class DWConv(Conv):
     # Depth-wise convolution
-    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):  # ch_in, ch_out, kernel, stride, dilation, activation
+    def __init__(self, c1, c2, k=1, s=1, d=1, act=True):
+        """Initializes a depth-wise convolution layer with optional activation; args: input channels (c1), output
+        channels (c2), kernel size (k), stride (s), dilation (d), and activation flag (act).
+        """
         super().__init__(c1, c2, k, s, g=math.gcd(c1, c2), d=d, act=act)
 
 
 class DWConvTranspose2d(nn.ConvTranspose2d):
     # Depth-wise transpose convolution
-    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):  # ch_in, ch_out, kernel, stride, padding, padding_out
+    def __init__(self, c1, c2, k=1, s=1, p1=0, p2=0):
+        """Initializes a depth-wise transpose convolutional layer for YOLOv5; args: input channels (c1), output channels
+        (c2), kernel size (k), stride (s), input padding (p1), output padding (p2).
+        """
         super().__init__(c1, c2, k, s, p1, p2, groups=math.gcd(c1, c2))
 
 
@@ -148,7 +158,10 @@ def forward(self, x):
 
 class Bottleneck(nn.Module):
     # Standard bottleneck
-    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):
+        """Initializes a standard bottleneck layer with optional shortcut and group convolution, supporting channel
+        expansion.
+        """
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
@@ -164,7 +177,10 @@ def forward(self, x):
 
 class BottleneckCSP(nn.Module):
     # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        """Initializes CSP bottleneck with optional shortcuts; args: ch_in, ch_out, number of repeats, shortcut bool,
+        groups, expansion.
+        """
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
@@ -206,7 +222,10 @@ def forward(self, x):
 
 class C3(nn.Module):
     # CSP Bottleneck with 3 convolutions
-    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):
+        """Initializes C3 module with options for channel count, bottleneck repetition, shortcut usage, group
+        convolutions, and expansion.
+        """
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
@@ -283,7 +302,13 @@ def forward(self, x):
 
 class SPPF(nn.Module):
     # Spatial Pyramid Pooling - Fast (SPPF) layer for YOLOv5 by Glenn Jocher
-    def __init__(self, c1, c2, k=5):  # equivalent to SPP(k=(5, 9, 13))
+    def __init__(self, c1, c2, k=5):
+        """
+        Initializes YOLOv5 SPPF layer with given channels and kernel size for YOLOv5 model, combining convolution and
+        max pooling.
+
+        Equivalent to SPP(k=(5, 9, 13)).
+        """
         super().__init__()
         c_ = c1 // 2  # hidden channels
         self.cv1 = Conv(c1, c_, 1, 1)
@@ -302,19 +327,26 @@ def forward(self, x):
 
 class Focus(nn.Module):
     # Focus wh information into c-space
-    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):
+        """Initializes Focus module to concentrate width-height info into channel space with configurable convolution
+        parameters.
+        """
         super().__init__()
         self.conv = Conv(c1 * 4, c2, k, s, p, g, act=act)
         # self.contract = Contract(gain=2)
 
-    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+    def forward(self, x):
+        """Processes input through Focus mechanism, reshaping (b,c,w,h) to (b,4c,w/2,h/2) then applies convolution."""
         return self.conv(torch.cat((x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]), 1))
         # return self.conv(self.contract(x))
 
 
 class GhostConv(nn.Module):
     # Ghost Convolution https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):
+        """Initializes GhostConv with in/out channels, kernel size, stride, groups, and activation; halves out channels
+        for efficiency.
+        """
         super().__init__()
         c_ = c2 // 2  # hidden channels
         self.cv1 = Conv(c1, c_, k, s, None, g, act=act)
@@ -328,7 +360,8 @@ def forward(self, x):
 
 class GhostBottleneck(nn.Module):
     # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
-    def __init__(self, c1, c2, k=3, s=1):  # ch_in, ch_out, kernel, stride
+    def __init__(self, c1, c2, k=3, s=1):
+        """Initializes GhostBottleneck with ch_in `c1`, ch_out `c2`, kernel size `k`, stride `s`; see https://github.com/huawei-noah/ghostnet."""
         super().__init__()
         c_ = c2 // 2
         self.conv = nn.Sequential(
@@ -982,10 +1015,14 @@ def print(self):
         """Logs the string representation of the current object's state via the LOGGER."""
         LOGGER.info(self.__str__())
 
-    def __len__(self):  # override len(results)
+    def __len__(self):
+        """Returns the number of results stored, overrides the default len(results)."""
         return self.n
 
-    def __str__(self):  # override print(results)
+    def __str__(self):
+        """Returns a string representation of the model's results, suitable for printing, overrides default
+        print(results).
+        """
         return self._run(pprint=True)  # print results
 
     def __repr__(self):
@@ -995,7 +1032,8 @@ def __repr__(self):
 
 class Proto(nn.Module):
     # YOLOv5 mask Proto module for segmentation models
-    def __init__(self, c1, c_=256, c2=32):  # ch_in, number of protos, number of masks
+    def __init__(self, c1, c_=256, c2=32):
+        """Initializes YOLOv5 Proto module for segmentation with input, proto, and mask channels configuration."""
         super().__init__()
         self.cv1 = Conv(c1, c_, k=3)
         self.upsample = nn.Upsample(scale_factor=2, mode="nearest")

diff --git a/models/experimental.py b/models/experimental.py
@@ -10,8 +10,12 @@
 
 
 class Sum(nn.Module):
-    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
-    def __init__(self, n, weight=False):  # n: number of inputs
+    """Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070."""
+
+    def __init__(self, n, weight=False):
+        """Initializes a module to sum outputs of layers with number of inputs `n` and optional weighting, supporting 2+
+        inputs.
+        """
         super().__init__()
         self.weight = weight  # apply weights boolean
         self.iter = range(n - 1)  # iter object
@@ -32,8 +36,12 @@ def forward(self, x):
 
 
 class MixConv2d(nn.Module):
-    # Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595
-    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):  # ch_in, ch_out, kernel, stride, ch_strategy
+    """Mixed Depth-wise Conv https://arxiv.org/abs/1907.09595."""
+
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        """Initializes MixConv2d with mixed depth-wise convolutional layers, taking input and output channels (c1, c2),
+        kernel sizes (k), stride (s), and channel distribution strategy (equal_ch).
+        """
         super().__init__()
         n = len(k)  # number of convolutions
         if equal_ch:  # equal c_ per group

diff --git a/models/tf.py b/models/tf.py
@@ -190,15 +190,25 @@ def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True, w=None):
         super().__init__()
         self.conv = TFConv(c1 * 4, c2, k, s, p, g, act, w.conv)
 
-    def call(self, inputs):  # x(b,w,h,c) -> y(b,w/2,h/2,4c)
-        # inputs = inputs / 255  # normalize 0-255 to 0-1
+    def call(self, inputs):
+        """
+        Performs pixel shuffling and convolution on input tensor, downsampling by 2 and expanding channels by 4.
+
+        Example x(b,w,h,c) -> y(b,w/2,h/2,4c).
+        """
         inputs = [inputs[:, ::2, ::2, :], inputs[:, 1::2, ::2, :], inputs[:, ::2, 1::2, :], inputs[:, 1::2, 1::2, :]]
         return self.conv(tf.concat(inputs, 3))
 
 
 class TFBottleneck(keras.layers.Layer):
     # Standard bottleneck
-    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):  # ch_in, ch_out, shortcut, groups, expansion
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5, w=None):
+        """
+        Initializes a standard bottleneck layer for TensorFlow models, expanding and contracting channels with optional
+        shortcut.
+
+        Arguments are ch_in, ch_out, shortcut, groups, expansion.
+        """
         super().__init__()
         c_ = int(c2 * e)  # hidden channels
         self.cv1 = TFConv(c1, c_, 1, 1, w=w.cv1)
@@ -364,7 +374,10 @@ def call(self, inputs):
 
 class TFDetect(keras.layers.Layer):
     # TF YOLOv5 Detect layer
-    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):  # detection layer
+    def __init__(self, nc=80, anchors=(), ch=(), imgsz=(640, 640), w=None):
+        """Initializes YOLOv5 detection layer for TensorFlow with configurable classes, anchors, channels, and image
+        size.
+        """
         super().__init__()
         self.stride = tf.convert_to_tensor(w.stride.numpy(), dtype=tf.float32)
         self.nc = nc  # number of classes
@@ -454,7 +467,13 @@ def call(self, inputs):
 
 class TFUpsample(keras.layers.Layer):
     # TF version of torch.nn.Upsample()
-    def __init__(self, size, scale_factor, mode, w=None):  # warning: all arguments needed including 'w'
+    def __init__(self, size, scale_factor, mode, w=None):
+        """
+        Initializes a TensorFlow upsampling layer with specified size, scale_factor, and mode, ensuring scale_factor is
+        even.
+
+        Warning: all arguments needed including 'w'
+        """
         super().__init__()
         assert scale_factor % 2 == 0, "scale_factor must be multiple of 2"
         self.upsample = lambda x: tf.image.resize(x, (x.shape[1] * scale_factor, x.shape[2] * scale_factor), mode)
@@ -481,7 +500,8 @@ def call(self, inputs):
         return tf.concat(inputs, self.d)
 
 
-def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
+def parse_model(d, ch, model, imgsz):
+    """Parses a model definition dict `d` to create YOLOv5 model layers, including dynamic channel adjustments."""
     LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
     anchors, nc, gd, gw, ch_mul = (
         d["anchors"],
@@ -562,7 +582,10 @@ def parse_model(d, ch, model, imgsz):  # model_dict, input_channels(3)
 
 class TFModel:
     # TF YOLOv5 model
-    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):  # model, channels, classes
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, model=None, imgsz=(640, 640)):
+        """Initializes TF YOLOv5 model with specified configuration, channels, classes, model instance, and input
+        size.
+        """
         super().__init__()
         if isinstance(cfg, dict):
             self.yaml = cfg  # model dict
@@ -640,7 +663,10 @@ def call(self, input, topk_all, iou_thres, conf_thres):
         )
 
     @staticmethod
-    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):  # agnostic NMS
+    def _nms(x, topk_all=100, iou_thres=0.45, conf_thres=0.25):
+        """Performs agnostic non-maximum suppression (NMS) on detected objects, filtering based on IoU and confidence
+        thresholds.
+        """
         boxes, classes, scores = x
         class_inds = tf.cast(tf.argmax(classes, axis=-1), tf.float32)
         scores_inp = tf.reduce_max(scores, -1)

diff --git a/models/yolo.py b/models/yolo.py
@@ -75,7 +75,8 @@ class Detect(nn.Module):
     dynamic = False  # force grid reconstruction
     export = False  # export mode
 
-    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):  # detection layer
+    def __init__(self, nc=80, anchors=(), ch=(), inplace=True):
+        """Initializes YOLOv5 detection layer with specified classes, anchors, channels, and inplace operations."""
         super().__init__()
         self.nc = nc  # number of classes
         self.no = nc + 5  # number of outputs per anchor
@@ -183,7 +184,8 @@ def _profile_one_layer(self, m, x, dt):
         if c:
             LOGGER.info(f"{sum(dt):10.2f} {'-':>10s} {'-':>10s}  Total")
 
-    def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
+    def fuse(self):
+        """Fuses Conv2d() and BatchNorm2d() layers in the model to improve inference speed."""
         LOGGER.info("Fusing layers... ")
         for m in self.model.modules():
             if isinstance(m, (Conv, DWConv)) and hasattr(m, "bn"):
@@ -193,7 +195,8 @@ def fuse(self):  # fuse model Conv2d() + BatchNorm2d() layers
         self.info()
         return self
 
-    def info(self, verbose=False, img_size=640):  # print model information
+    def info(self, verbose=False, img_size=640):
+        """Prints model information given verbosity and image size, e.g., `info(verbose=True, img_size=640)`."""
         model_info(self, verbose, img_size)
 
     def _apply(self, fn):
@@ -212,7 +215,8 @@ def _apply(self, fn):
 
 class DetectionModel(BaseModel):
     # YOLOv5 detection model
-    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):  # model, input channels, number of classes
+    def __init__(self, cfg="yolov5s.yaml", ch=3, nc=None, anchors=None):
+        """Initializes YOLOv5 model with configuration file, input channels, number of classes, and custom anchors."""
         super().__init__()
         if isinstance(cfg, dict):
             self.yaml = cfg  # model dict
@@ -303,8 +307,12 @@ def _clip_augmented(self, y):
         y[-1] = y[-1][:, i:]  # small
         return y
 
-    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
-        # https://arxiv.org/abs/1708.02002 section 3.3
+    def _initialize_biases(self, cf=None):
+        """
+        Initializes biases for YOLOv5's Detect() module, optionally using class frequencies (cf).
+
+        For details see https://arxiv.org/abs/1708.02002 section 3.3.
+        """
         # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
         m = self.model[-1]  # Detect() module
         for mi, s in zip(m.m, m.stride):  # from
@@ -328,7 +336,10 @@ def __init__(self, cfg="yolov5s-seg.yaml", ch=3, nc=None, anchors=None):
 
 class ClassificationModel(BaseModel):
     # YOLOv5 classification model
-    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):  # yaml, model, number of classes, cutoff index
+    def __init__(self, cfg=None, model=None, nc=1000, cutoff=10):
+        """Initializes YOLOv5 model with config file `cfg`, input channels `ch`, number of classes `nc`, and `cuttoff`
+        index.
+        """
         super().__init__()
         self._from_detection_model(model, nc, cutoff) if model is not None else self._from_yaml(cfg)
 
@@ -354,8 +365,8 @@ def _from_yaml(self, cfg):
         self.model = None
 
 
-def parse_model(d, ch):  # model_dict, input_channels(3)
-    # Parse a YOLOv5 model.yaml dictionary
+def parse_model(d, ch):
+    """Parses a YOLOv5 model from a dict `d`, configuring layers based on input channels `ch` and model architecture."""
     LOGGER.info(f"\n{'':>3}{'from':>18}{'n':>3}{'params':>10}  {'module':<40}{'arguments':<30}")
     anchors, nc, gd, gw, act, ch_mul = (
         d["anchors"],

diff --git a/segment/train.py b/segment/train.py
@@ -95,7 +95,12 @@
 GIT_INFO = check_git_info()
 
 
-def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+def train(hyp, opt, device, callbacks):
+    """
+    Trains the YOLOv5 model on a dataset, managing hyperparameters, model optimization, logging, and validation.
+
+    `hyp` is path/to/hyp.yaml or hyp dictionary.
+    """
     (
         save_dir,
         epochs,

diff --git a/train.py b/train.py
@@ -100,7 +100,13 @@
 GIT_INFO = check_git_info()
 
 
-def train(hyp, opt, device, callbacks):  # hyp is path/to/hyp.yaml or hyp dictionary
+def train(hyp, opt, device, callbacks):
+    """
+    Trains YOLOv5 model with given hyperparameters, options, and device, managing datasets, model architecture, loss
+    computation, and optimizer steps.
+
+    `hyp` argument is path/to/hyp.yaml or hyp dictionary.
+    """
     save_dir, epochs, batch_size, weights, single_cls, evolve, data, cfg, resume, noval, nosave, workers, freeze = (
         Path(opt.save_dir),
         opt.epochs,