diff --git a/test/test_models.py b/test/test_models.py index 3ec3ea9c..16379af3 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -108,7 +108,8 @@ def _init_test_yolo_head(self): in_channels = self._get_in_channels() num_anchors = self._get_num_anchors() num_classes = self._get_num_classes() - box_head = YoloHead(in_channels, num_anchors, num_classes) + strides = self._get_strides() + box_head = YoloHead(in_channels, num_anchors, strides, num_classes) return box_head def test_yolo_head(self): diff --git a/yolort/models/box_head.py b/yolort/models/box_head.py index 5742a991..f4d64886 100644 --- a/yolort/models/box_head.py +++ b/yolort/models/box_head.py @@ -1,4 +1,5 @@ # Modified from ultralytics/yolov5 by Zhiqiang Wang +import math import torch from torch import nn, Tensor @@ -10,14 +11,31 @@ class YoloHead(nn.Module): - def __init__(self, in_channels: List[int], num_anchors: int, num_classes: int): + def __init__(self, in_channels: List[int], num_anchors: int, strides: List[int], num_classes: int): super().__init__() self.num_anchors = num_anchors # anchors + self.num_classes = num_classes self.num_outputs = num_classes + 5 # number of outputs per anchor + self.strides = strides self.head = nn.ModuleList( nn.Conv2d(ch, self.num_outputs * self.num_anchors, 1) for ch in in_channels) # output conv + self._initialize_biases() # Init weights, biases + + def _initialize_biases(self, cf=None): + """ + Initialize biases into YoloHead, cf is class frequency + Check section 3.3 in + """ + for mi, s in zip(self.head, self.strides): + b = mi.bias.view(self.num_anchors, -1) # conv.bias(255) to (3,85) + # obj (8 objects per 640 image) + b.data[:, 4] += math.log(8 / (640 / s) ** 2) + # classes + b.data[:, 5:] += torch.log(cf / cf.sum()) if cf else math.log(0.6 / (self.num_classes - 0.99)) + mi.bias = nn.Parameter(b.view(-1), requires_grad=True) + def get_result_from_head(self, features: Tensor, idx: int) -> Tensor: """ This is equivalent to self.head[idx](features), @@ -199,7 +217,8 @@ def assign_targets_to_anchors( # Append a = targets_with_gain[:, 6].long() # anchor indices # image, anchor, grid indices - indices.append((bc[0], a, grid_ij[:, 1].clamp_(0, gain[3] - 1), grid_ij[:, 0].clamp_(0, gain[2] - 1))) + indices.append((bc[0], a, grid_ij[:, 1].clamp_(0, gain[3] - 1), + grid_ij[:, 0].clamp_(0, gain[2] - 1))) targets_box.append(torch.cat((grid_xy - grid_ij, grid_wh), 1)) # box anchors_encode.append(anchors_per_layer[a]) # anchors targets_cls.append(bc[1]) # class diff --git a/yolort/models/yolo.py b/yolort/models/yolo.py index 166b86dc..0f00d5cd 100644 --- a/yolort/models/yolo.py +++ b/yolort/models/yolo.py @@ -56,6 +56,7 @@ def __init__( head = YoloHead( backbone.out_channels, anchor_generator.num_anchors, + anchor_generator.strides, num_classes, ) self.head = head