Megvii-BaseDetection · weiji14 · Apr 13, 2022 · Apr 14, 2022 · Apr 14, 2022 · dcyoung
diff --git a/exps/default/yolox_nano.py b/exps/default/yolox_nano.py
@@ -14,6 +14,7 @@ def __init__(self):
         super(Exp, self).__init__()
         self.depth = 0.33
         self.width = 0.25
+        self.backbone_in_channels = 3
         self.input_size = (416, 416)
         self.random_size = (10, 20)
         self.mosaic_scale = (0.5, 1.5)
@@ -34,8 +35,12 @@ def init_yolo(M):
             in_channels = [256, 512, 1024]
             # NANO model use depthwise = True, which is main difference.
             backbone = YOLOPAFPN(
-                self.depth, self.width, in_channels=in_channels,
-                act=self.act, depthwise=True,
+                self.depth,
+                self.width,
+                backbone_in_channels=self.backbone_in_channels,
+                in_channels=in_channels,
+                act=self.act,
+                depthwise=True,
             )
             head = YOLOXHead(
                 self.num_classes, self.width, in_channels=in_channels,

diff --git a/yolox/exp/yolox_base.py b/yolox/exp/yolox_base.py
@@ -17,6 +17,8 @@ def __init__(self):
         super().__init__()
 
         # ---------------- model config ---------------- #
+        # number of input channels, e.g. 3 for RGB input
+        self.backbone_in_channels = 3
         # detect classes number of model
         self.num_classes = 80
         # factor of model depth
@@ -118,8 +120,16 @@ def init_yolo(M):
 
         if getattr(self, "model", None) is None:
             in_channels = [256, 512, 1024]
-            backbone = YOLOPAFPN(self.depth, self.width, in_channels=in_channels, act=self.act)
-            head = YOLOXHead(self.num_classes, self.width, in_channels=in_channels, act=self.act)
+            backbone = YOLOPAFPN(
+                self.depth,
+                self.width,
+                backbone_in_channels=self.backbone_in_channels,
+                in_channels=in_channels,
+                act=self.act,
+            )
+            head = YOLOXHead(
+                self.num_classes, self.width, in_channels=in_channels, act=self.act
+            )
             self.model = YOLOX(backbone, head)
 
         self.model.apply(init_yolo)

diff --git a/yolox/models/build.py b/yolox/models/build.py
@@ -29,7 +29,11 @@
 
 
 def create_yolox_model(
-    name: str, pretrained: bool = True, num_classes: int = 80, device=None
+    name: str,
+    pretrained: bool = True,
+    backbone_in_channels: int = 3,
+    num_classes: int = 80,
+    device=None,
 ) -> nn.Module:
     """creates and loads a YOLOX model
 
@@ -50,9 +54,10 @@ def create_yolox_model(
 
     assert name in _CKPT_FULL_PATH, f"user should use one of value in {_CKPT_FULL_PATH.keys()}"
     exp: Exp = get_exp(exp_name=name)
+    exp.backbone_in_channels = backbone_in_channels
     exp.num_classes = num_classes
     yolox_model = exp.get_model()
-    if pretrained and num_classes == 80:
+    if pretrained and backbone_in_channels == 3 and num_classes == 80:
         weights_url = _CKPT_FULL_PATH[name]
         ckpt = load_state_dict_from_url(weights_url, map_location="cpu")
         if "model" in ckpt:
@@ -63,29 +68,43 @@ def create_yolox_model(
     return yolox_model
 
 
-def yolox_nano(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-nano", pretrained, num_classes, device)
+def yolox_nano(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-nano", pretrained, backbone_in_channels, num_classes, device
+    )
 
 
-def yolox_tiny(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-tiny", pretrained, num_classes, device)
+def yolox_tiny(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-tiny", pretrained, backbone_in_channels, num_classes, device
+    )
 
 
-def yolox_s(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-s", pretrained, num_classes, device)
+def yolox_s(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-s", pretrained, backbone_in_channels, num_classes, device
+    )
 
 
-def yolox_m(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-m", pretrained, num_classes, device)
+def yolox_m(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-m", pretrained, backbone_in_channels, num_classes, device
+    )
 
 
-def yolox_l(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-l", pretrained, num_classes, device)
+def yolox_l(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-l", pretrained, backbone_in_channels, num_classes, device
+    )
 
 
-def yolox_x(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-x", pretrained, num_classes, device)
+def yolox_x(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-x", pretrained, backbone_in_channels, num_classes, device
+    )
 
 
-def yolov3(pretrained=True, num_classes=80, device=None):
-    return create_yolox_model("yolox-tiny", pretrained, num_classes, device)
+def yolov3(pretrained=True, backbone_in_channels=3, num_classes=80, device=None):
+    return create_yolox_model(
+        "yolox-tiny", pretrained, backbone_in_channels, num_classes, device
+    )
diff --git a/yolox/models/darknet.py b/yolox/models/darknet.py
@@ -99,6 +99,7 @@ def __init__(
         self,
         dep_mul,
         wid_mul,
+        in_channels=3,
         out_features=("dark3", "dark4", "dark5"),
         depthwise=False,
         act="silu",
@@ -112,7 +113,7 @@ def __init__(
         base_depth = max(round(dep_mul * 3), 1)  # 3
 
         # stem
-        self.stem = Focus(3, base_channels, ksize=3, act=act)
+        self.stem = Focus(in_channels, base_channels, ksize=3, act=act)
 
         # dark2
         self.dark2 = nn.Sequential(

diff --git a/yolox/models/yolo_fpn.py b/yolox/models/yolo_fpn.py
@@ -17,11 +17,12 @@ class YOLOFPN(nn.Module):
     def __init__(
         self,
         depth=53,
+        backbone_in_channels=3,
         in_features=["dark3", "dark4", "dark5"],
     ):
         super().__init__()
 
-        self.backbone = Darknet(depth)
+        self.backbone = Darknet(depth, in_channels=backbone_in_channels)
         self.in_features = in_features
 
         # out 1

diff --git a/yolox/models/yolo_pafpn.py b/yolox/models/yolo_pafpn.py
@@ -18,13 +18,16 @@ def __init__(
         self,
         depth=1.0,
         width=1.0,
+        backbone_in_channels=3,
         in_features=("dark3", "dark4", "dark5"),
         in_channels=[256, 512, 1024],
         depthwise=False,
         act="silu",
     ):
         super().__init__()
-        self.backbone = CSPDarknet(depth, width, depthwise=depthwise, act=act)
+        self.backbone = CSPDarknet(
+            depth, width, in_channels=backbone_in_channels, depthwise=depthwise, act=act
+        )
         self.in_features = in_features
         self.in_channels = in_channels
         Conv = DWConv if depthwise else BaseConv