Feature/sg 1033 fix yolox anchors (#1369)

* Update readme * Fix small bug in __repr__ implementation of KeypointsImageToTensor * Test * Test * Test * Test * Test * Test * Make graphsurgeon an optional * Make graphsurgeon an optional * Properly handle imports of optional packages * Added empty __init__.py files * Do imports of gs inside the export call * Do imports of gs inside the export call * Fix DEKR's missing HasPredict interface * Update notebook & example doc to reflect changes in imports & function names * Update readme * Put back images * Install onnx_graphsurgeon in CI * Install onnx_graphsurgeon in CI * Working prototype of YoloX fix of Anchors that can load model weights as well * Added more tests for detection predict() and yolox checkpoint loading * Fix version of ONNX-GS installed in CI and installed on-demand * Added docs * Added docs * Added docs * Remove leftover * Set ignore_errors=True to trainer test and declare why * Fix bug in maybe_remove_module_prefix
Deci-AI · Aug 15, 2023 · b308cf2 · b308cf2
1 parent 0f96f1e
commit b308cf2
Show file tree

Hide file tree

Showing 7 changed files with 1,336 additions and 50 deletions.
diff --git a/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py b/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py
@@ -206,7 +206,14 @@ def prep_model_for_conversion(self, input_size: Union[tuple, list] = None, **kwa
         with convertible substitutes and remove all auxiliary or training related parts.
         :param input_size: [H,W]
         """
-        self.head.cache_anchors(input_size)
+
+        # There is some discrepancy of what input_size is.
+        # When exporting to ONNX it is passed as 4-element tuple (B,C,H,W)
+        # When called from predict() it is just (H,W)
+        # So we take two last elements of the tuple which handles both cases but ultimately we should fix this
+        h, w = input_size[-2:]
+
+        self.head.cache_anchors((h, w))
 
         for module in self.modules():
             if isinstance(module, RepVGGBlock):

diff --git a/src/super_gradients/training/models/detection_models/yolo_base.py b/src/super_gradients/training/models/detection_models/yolo_base.py
@@ -3,6 +3,7 @@
 from typing import Union, Type, List, Tuple, Optional
 from functools import lru_cache
 
+import numpy as np
 import torch
 import torch.nn as nn
 
@@ -177,7 +178,7 @@ class DetectX(nn.Module):
     def __init__(
         self,
         num_classes: int,
-        stride: torch.Tensor,
+        stride: np.ndarray,
         activation_func_type: type,
         channels: list,
         depthwise=False,
@@ -203,7 +204,7 @@ def __init__(
         self.n_anchors = 1
         self.grid = [torch.zeros(1)] * self.detection_layers_num  # init grid
 
-        self.register_buffer("stride", stride)
+        self.register_buffer("stride", torch.tensor(stride), persistent=False)
 
         self.cls_convs = nn.ModuleList()
         self.reg_convs = nn.ModuleList()
@@ -409,7 +410,7 @@ def __init__(self, arch_params):
         )  # 24
 
         self._shortcuts = nn.ModuleList([CrossModelSkipConnection() for _ in range(len(self._skip_connections_dict.keys()) - 1)])
-        self.anchors = anchors
+
         self.width_mult = width_mult
 
     def forward(self, intermediate_output):
@@ -481,6 +482,7 @@ def __init__(self, backbone: Type[nn.Module], arch_params: HpmStruct, initialize
         self._image_processor: Optional[Processing] = None
         self._default_nms_iou: Optional[float] = None
         self._default_nms_conf: Optional[float] = None
+        self.register_buffer("strides", torch.tensor(self.arch_params.anchors.stride), persistent=False)
 
     @staticmethod
     def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
@@ -617,8 +619,6 @@ def _check_strides(self):
         if not torch.equal(m.stride, stride):
             raise RuntimeError("Provided anchor strides do not match the model strides")
 
-        self.register_buffer("stride", m.stride)  # USED ONLY FOR CONVERSION
-
     def _initialize_biases(self):
         """initialize biases into DetectX()"""
         detect_module = self._head._modules_list[-1]  # DetectX() module
@@ -650,7 +650,7 @@ def prep_model_for_conversion(self, input_size: Union[tuple, list] = None, **kwa
         assert not self.training, "model has to be in eval mode to be converted"
 
         # Verify dummy_input from converter is of multiple of the grid size
-        max_stride = int(max(self.stride))
+        max_stride = int(max(self.strides))
 
         # Validate the image size
         image_dims = input_size[-2:]  # assume torch uses channels first layout

diff --git a/src/super_gradients/training/utils/checkpoint_utils.py b/src/super_gradients/training/utils/checkpoint_utils.py
diff --git a/src/super_gradients/training/utils/detection_utils.py b/src/super_gradients/training/utils/detection_utils.py
@@ -551,7 +551,7 @@ def visualize_batch(
         return out_images
 
 
-class Anchors(nn.Module):
+class Anchors:
     """
     A wrapper function to hold the anchors used by detection models such as Yolo
     """
@@ -568,15 +568,15 @@ def __init__(self, anchors_list: List[List], strides: List[int]):
         super().__init__()
 
         self.__anchors_list = anchors_list
-        self.__strides = strides
+        self.__strides = tuple(strides)
 
         self._check_all_lists(anchors_list)
         self._check_all_len_equal_and_even(anchors_list)
 
-        self._stride = nn.Parameter(torch.Tensor(strides).float(), requires_grad=False)
-        anchors = torch.Tensor(anchors_list).float().view(len(anchors_list), -1, 2)
-        self._anchors = nn.Parameter(anchors / self._stride.view(-1, 1, 1), requires_grad=False)
-        self._anchor_grid = nn.Parameter(anchors.clone().view(len(anchors_list), 1, -1, 1, 1, 2), requires_grad=False)
+        self._stride = np.array(strides, dtype=np.float32)
+        anchors = np.array(anchors_list, dtype=np.float32).reshape((len(anchors_list), -1, 2))
+        self._anchors = anchors / self._stride.reshape((-1, 1, 1))
+        self._anchor_grid = anchors.copy().reshape(len(anchors_list), 1, -1, 1, 1, 2)
 
     @staticmethod
     def _check_all_lists(anchors: list) -> bool:
@@ -592,15 +592,15 @@ def _check_all_len_equal_and_even(anchors: list) -> bool:
                 raise RuntimeError("All objects of anchors_list must be of the same even length")
 
     @property
-    def stride(self) -> nn.Parameter:
+    def stride(self) -> np.ndarray:
         return self._stride
 
     @property
-    def anchors(self) -> nn.Parameter:
+    def anchors(self) -> np.ndarray:
         return self._anchors
 
     @property
-    def anchor_grid(self) -> nn.Parameter:
+    def anchor_grid(self) -> np.ndarray:
         return self._anchor_grid
 
     @property

diff --git a/tests/end_to_end_tests/trainer_test.py b/tests/end_to_end_tests/trainer_test.py
@@ -39,7 +39,10 @@ def tearDownClass(cls) -> None:
         for experiment_name in cls.experiment_names:
             experiment_dir = get_checkpoints_dir_path(experiment_name=experiment_name)
             if os.path.isdir(experiment_dir):
-                shutil.rmtree(experiment_dir)
+                # TODO: Occasionally this method fails because log files are still open (See setup_logging() call).
+                # TODO: Need to find a way to close them at the end of training, this is however tricky to achieve
+                # TODO: because setup_logging() called outside of Trainer class.
+                shutil.rmtree(experiment_dir, ignore_errors=True)
 
     @staticmethod
     def get_classification_trainer(name=""):

diff --git a/tests/unit_tests/export_detection_model_test.py b/tests/unit_tests/export_detection_model_test.py
@@ -547,7 +547,7 @@ def manual_test_export_export_all_variants(self):
                     #     pass
 
                     for model_type in [
-                        # Models.YOLOX_S don't have full support for YOLOX so it's commented out,
+                        Models.YOLOX_S,
                         Models.PP_YOLOE_S,
                         Models.YOLO_NAS_S,
                     ]:

diff --git a/tests/unit_tests/yolox_unit_test.py b/tests/unit_tests/yolox_unit_test.py
@@ -1,7 +1,11 @@
+import os
+import tempfile
 import unittest
 
 import torch
 
+from super_gradients.common.object_names import Models
+from super_gradients.training import models
 from super_gradients.training.losses import YoloXDetectionLoss, YoloXFastDetectionLoss
 from super_gradients.training.models.detection_models.yolox import YoloX_N, YoloX_T, YoloX_S, YoloX_M, YoloX_L, YoloX_X
 from super_gradients.training.utils.detection_utils import DetectionCollateFN
@@ -69,6 +73,32 @@ def test_yolox_loss(self):
                 result = loss(predictions, targets.to(device))
                 print(result)
 
+    def test_yolo_x_checkpoint_solver(self):
+        """
+        This test checks whether we can:
+        1. load an old pretrained weights for YoloX that has non-matching keys  (Using custom solver under the hood).
+        2. load a regular checkpoint (As if one would train a model from scratch).
+        3. that both models produce the same output.
+
+        :return:
+        """
+        model_variant = [Models.YOLOX_S, Models.YOLOX_M, Models.YOLOX_L, Models.YOLOX_T, Models.YOLOX_N]
+        for model_name in model_variant:
+            model = models.get(model_name, pretrained_weights="coco").eval()
+            input = torch.randn((1, 3, 320, 320))
+
+            output1 = model(input)
+
+            sd = model.state_dict()
+
+            with tempfile.TemporaryDirectory() as tmp_dirname:
+                path = os.path.join(tmp_dirname, f"{model_name}_coco.pth")
+                torch.save({"net": sd}, path)
+                model = models.get(model_name, num_classes=80, checkpoint_path=path).eval()
+                output2 = model(input)
+
+            assert torch.allclose(output1[0], output2[0], atol=1e-4)
+
 
 if __name__ == "__main__":
     unittest.main()