Feature/sg 747 support predict video full pipeline master (#829)

* wip * move to imageprocessors * wip * add back changes * making it work fully for yolox and almost for ppyoloe * minor change * working for det * cleaning * clean * undo * replace empty with none * add _get_shift_params * minor doc change * cleaning wip * working for multiple images * add ppyoloe * replace pydantic with dataclasses and fix typing * add docstrings * doc improvment and use get_shift_params in transforms * add tests * improve comment * rename * wip * add option to keep ratio in rescale * make functions private * remove DetectionPaddedRescale * fix doc * big commit with wrong things * try undo bad change * doc * minor doc * add a lot of doc * fix comment * minor change * first draft of load_video * adding save_video, some parts are still to be checked * wip * add __init__.py to pipelines * replace size with shape * wip * cleaning * wip * fix rgb to bgr and remove check * almost working, missing batch * proposal of predict_video * wip working on dete * add yolox * add flag to visualize * update * add streaming * improve streaming code * docstring update * fix stream example * rename Results * cleaning * rename stream to predict_webcam * doc fixes * improve docstring and homogenize some names * rename _images_prediction_lst * improve doc * add doc * minore change * fix image * fix ci * fix merge * reverse channel properly
Deci-AI · Apr 17, 2023 · e83823f · e83823f
1 parent df9fdbc
commit e83823f
Show file tree

Hide file tree

Showing 22 changed files with 906 additions and 227 deletions.
diff --git a/documentation/source/images/examples/countryside.jpg b/documentation/source/images/examples/countryside.jpg
diff --git a/documentation/source/images/examples/street_busy.jpg b/documentation/source/images/examples/street_busy.jpg
diff --git a/documentation/source/images/examples/street_vehicles.jpg b/documentation/source/images/examples/street_vehicles.jpg
diff --git a/src/super_gradients/examples/predict/detection_predict.py b/src/super_gradients/examples/predict/detection_predict.py
@@ -5,9 +5,10 @@
 model = models.get(Models.PP_YOLOE_S, pretrained_weights="coco")
 
 IMAGES = [
-    "https://miro.medium.com/v2/resize:fit:500/0*w1s81z-Q72obhE_z",
-    "https://s.hs-data.com/bilder/spieler/gross/128069.jpg",
-    "https://datasets-server.huggingface.co/assets/Chris1/cityscapes/--/Chris1--cityscapes/train/28/image/image.jpg",
+    "../../../../documentation/source/images/examples/countryside.jpg",
+    "../../../../documentation/source/images/examples/street_busy.jpg",
+    "https://cdn-attachments.timesofmalta.com/cc1eceadde40d2940bc5dd20692901371622153217-1301777007-4d978a6f-620x348.jpg",
 ]
-prediction = model.predict(IMAGES, iou=0.65, conf=0.5)
+
+prediction = model.predict(IMAGES)
 prediction.show()
diff --git a/src/super_gradients/examples/predict/detection_predict_image_folder.py b/src/super_gradients/examples/predict/detection_predict_image_folder.py
@@ -0,0 +1,9 @@
+from super_gradients.common.object_names import Models
+from super_gradients.training import models
+
+# Note that currently only YoloX and PPYoloE are supported.
+model = models.get(Models.YOLOX_N, pretrained_weights="coco")
+
+image_folder_path = "../../../../documentation/source/images/examples"
+predictions = model.predict(image_folder_path)
+predictions.show()
diff --git a/src/super_gradients/examples/predict/detection_predict_streaming.py b/src/super_gradients/examples/predict/detection_predict_streaming.py
@@ -0,0 +1,6 @@
+from super_gradients.common.object_names import Models
+from super_gradients.training import models
+
+# Note that currently only YoloX and PPYoloE are supported.
+model = models.get(Models.YOLOX_N, pretrained_weights="coco")
+model.predict_webcam()
diff --git a/src/super_gradients/examples/predict/detection_predict_video.py b/src/super_gradients/examples/predict/detection_predict_video.py
@@ -0,0 +1,9 @@
+from super_gradients.common.object_names import Models
+from super_gradients.training import models
+
+# Note that currently only YoloX and PPYoloE are supported.
+model = models.get(Models.YOLOX_N, pretrained_weights="coco")
+
+video_path = "<path/to/your/video>"
+predictions = model.predict(video_path)
+predictions.show()
diff --git a/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py b/src/super_gradients/training/datasets/detection_datasets/yolo_format_detection.py
@@ -5,7 +5,7 @@
 from typing import List, Optional
 
 from super_gradients.common.abstractions.abstract_logger import get_logger
-from super_gradients.training.utils.load_image import is_image
+from super_gradients.training.utils.media.image import is_image
 from super_gradients.training.datasets.detection_datasets.detection_dataset import DetectionDataset
 from super_gradients.training.datasets.data_formats import ConcatenatedTensorFormatConverter
 from super_gradients.training.datasets.data_formats.default_formats import XYXY_LABEL, LABEL_NORMALIZED_CXCYWH

diff --git a/src/super_gradients/training/models/detection_models/customizable_detector.py b/src/super_gradients/training/models/detection_models/customizable_detector.py
@@ -5,16 +5,19 @@
  * each module accepts in_channels and other parameters
  * each module defines out_channels property on construction
 """
-
-
-from typing import Union, Optional
+from typing import Union, Optional, List
 
 from torch import nn
 from omegaconf import DictConfig
 
 from super_gradients.training.utils.utils import HpmStruct
 from super_gradients.training.models.sg_module import SgModule
 import super_gradients.common.factories.detection_modules_factory as det_factory
+from super_gradients.training.models.prediction_results import ImagesDetectionPrediction
+from super_gradients.training.pipelines.pipelines import DetectionPipeline
+from super_gradients.training.transforms.processing import Processing
+from super_gradients.training.utils.detection_utils import DetectionPostPredictionCallback
+from super_gradients.training.utils.media.image import ImageSource
 
 
 class CustomizableDetector(SgModule):
@@ -67,6 +70,12 @@ def __init__(
 
         self._initialize_weights(bn_eps, bn_momentum, inplace_act)
 
+        # Processing params
+        self._class_names: Optional[List[str]] = None
+        self._image_processor: Optional[Processing] = None
+        self._default_nms_iou: Optional[float] = None
+        self._default_nms_conf: Optional[float] = None
+
     def forward(self, x):
         x = self.backbone(x)
         x = self.neck(x)
@@ -96,3 +105,70 @@ def replace_head(self, new_num_classes: Optional[int] = None, new_head: Optional
             self.heads_params = factory.insert_module_param(self.heads_params, "num_classes", new_num_classes)
             self.heads = factory.get(factory.insert_module_param(self.heads_params, "in_channels", self.neck.out_channels))
             self._initialize_weights(self.bn_eps, self.bn_momentum, self.inplace_act)
+
+    @staticmethod
+    def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
+        raise NotImplementedError
+
+    def set_dataset_processing_params(
+        self,
+        class_names: Optional[List[str]] = None,
+        image_processor: Optional[Processing] = None,
+        iou: Optional[float] = None,
+        conf: Optional[float] = None,
+    ) -> None:
+        """Set the processing parameters for the dataset.
+
+        :param class_names:     (Optional) Names of the dataset the model was trained on.
+        :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
+        :param iou:             (Optional) IoU threshold for the nms algorithm
+        :param conf:            (Optional) Below the confidence threshold, prediction are discarded
+        """
+        self._class_names = class_names or self._class_names
+        self._image_processor = image_processor or self._image_processor
+        self._default_nms_iou = iou or self._default_nms_iou
+        self._default_nms_conf = conf or self._default_nms_conf
+
+    def _get_pipeline(self, iou: Optional[float] = None, conf: Optional[float] = None) -> DetectionPipeline:
+        """Instantiate the prediction pipeline of this model.
+
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
+            raise RuntimeError(
+                "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
+            )
+
+        iou = iou or self._default_nms_iou
+        conf = conf or self._default_nms_conf
+
+        pipeline = DetectionPipeline(
+            model=self,
+            image_processor=self._image_processor,
+            post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
+            class_names=self._class_names,
+        )
+        return pipeline
+
+    def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Optional[float] = None) -> ImagesDetectionPrediction:
+        """Predict an image or a list of images.
+
+        :param images:  Images to predict.
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        pipeline = self._get_pipeline(iou=iou, conf=conf)
+        return pipeline(images)  # type: ignore
+
+    def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None):
+        """Predict using webcam.
+
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        pipeline = self._get_pipeline(iou=iou, conf=conf)
+        pipeline.predict_webcam()
diff --git a/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py b/src/super_gradients/training/models/detection_models/pp_yolo_e/pp_yolo_e.py
@@ -12,9 +12,10 @@
 from super_gradients.training.utils import HpmStruct
 from super_gradients.training.models.arch_params_factory import get_arch_params
 from super_gradients.training.models.detection_models.pp_yolo_e.post_prediction_callback import PPYoloEPostPredictionCallback, DetectionPostPredictionCallback
-from super_gradients.training.models.results import DetectionResults
+from super_gradients.training.models.prediction_results import ImagesDetectionPrediction
 from super_gradients.training.pipelines.pipelines import DetectionPipeline
 from super_gradients.training.transforms.processing import Processing
+from super_gradients.training.utils.media.image import ImageSource
 
 
 class PPYoloE(SgModule):
@@ -29,34 +30,75 @@ def __init__(self, arch_params):
 
         self._class_names: Optional[List[str]] = None
         self._image_processor: Optional[Processing] = None
+        self._default_nms_iou: Optional[float] = None
+        self._default_nms_conf: Optional[float] = None
 
     @staticmethod
     def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
         return PPYoloEPostPredictionCallback(score_threshold=conf, nms_threshold=iou, nms_top_k=1000, max_predictions=300)
 
-    def set_dataset_processing_params(self, class_names: Optional[List[str]], image_processor: Optional[Processing]) -> None:
+    def set_dataset_processing_params(
+        self,
+        class_names: Optional[List[str]] = None,
+        image_processor: Optional[Processing] = None,
+        iou: Optional[float] = None,
+        conf: Optional[float] = None,
+    ) -> None:
         """Set the processing parameters for the dataset.
 
         :param class_names:     (Optional) Names of the dataset the model was trained on.
         :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
+        :param iou:             (Optional) IoU threshold for the nms algorithm
+        :param conf:            (Optional) Below the confidence threshold, prediction are discarded
         """
         self._class_names = class_names or self._class_names
         self._image_processor = image_processor or self._image_processor
+        self._default_nms_iou = iou or self._default_nms_iou
+        self._default_nms_conf = conf or self._default_nms_conf
 
-    def predict(self, images, iou: float = 0.65, conf: float = 0.01) -> DetectionResults:
+    def _get_pipeline(self, iou: Optional[float] = None, conf: Optional[float] = None) -> DetectionPipeline:
+        """Instantiate the prediction pipeline of this model.
 
-        if self._class_names is None or self._image_processor is None:
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
             raise RuntimeError(
                 "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
             )
 
+        iou = iou or self._default_nms_iou
+        conf = conf or self._default_nms_conf
+
         pipeline = DetectionPipeline(
             model=self,
             image_processor=self._image_processor,
             post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
             class_names=self._class_names,
         )
-        return pipeline(images)
+        return pipeline
+
+    def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Optional[float] = None) -> ImagesDetectionPrediction:
+        """Predict an image or a list of images.
+
+        :param images:  Images to predict.
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        pipeline = self._get_pipeline(iou=iou, conf=conf)
+        return pipeline(images)  # type: ignore
+
+    def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None):
+        """Predict using webcam.
+
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        pipeline = self._get_pipeline(iou=iou, conf=conf)
+        pipeline.predict_webcam()
 
     def forward(self, x: Tensor):
         features = self.backbone(x)

diff --git a/src/super_gradients/training/models/detection_models/yolo_base.py b/src/super_gradients/training/models/detection_models/yolo_base.py
@@ -11,10 +11,10 @@
 from super_gradients.training.utils import torch_version_is_greater_or_equal
 from super_gradients.training.utils.detection_utils import non_max_suppression, matrix_non_max_suppression, NMS_Type, DetectionPostPredictionCallback, Anchors
 from super_gradients.training.utils.utils import HpmStruct, check_img_size_divisibility, get_param
-from super_gradients.training.models.results import DetectionResults
+from super_gradients.training.models.prediction_results import ImagesDetectionPrediction
 from super_gradients.training.pipelines.pipelines import DetectionPipeline
 from super_gradients.training.transforms.processing import Processing
-
+from super_gradients.training.utils.media.image import ImageSource
 
 COCO_DETECTION_80_CLASSES_BBOX_ANCHORS = Anchors(
     [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]], strides=[8, 16, 32]
@@ -418,33 +418,75 @@ def __init__(self, backbone: Type[nn.Module], arch_params: HpmStruct, initialize
 
         self._class_names: Optional[List[str]] = None
         self._image_processor: Optional[Processing] = None
+        self._default_nms_iou: Optional[float] = None
+        self._default_nms_conf: Optional[float] = None
 
     @staticmethod
     def get_post_prediction_callback(conf: float, iou: float) -> DetectionPostPredictionCallback:
         return YoloPostPredictionCallback(conf=conf, iou=iou)
 
-    def set_dataset_processing_params(self, class_names: Optional[List[str]], image_processor: Optional[Processing]) -> None:
+    def set_dataset_processing_params(
+        self,
+        class_names: Optional[List[str]] = None,
+        image_processor: Optional[Processing] = None,
+        iou: Optional[float] = None,
+        conf: Optional[float] = None,
+    ) -> None:
         """Set the processing parameters for the dataset.
 
         :param class_names:     (Optional) Names of the dataset the model was trained on.
         :param image_processor: (Optional) Image processing objects to reproduce the dataset preprocessing used for training.
+        :param iou:             (Optional) IoU threshold for the nms algorithm
+        :param conf:            (Optional) Below the confidence threshold, prediction are discarded
         """
         self._class_names = class_names or self._class_names
         self._image_processor = image_processor or self._image_processor
+        self._default_nms_iou = iou or self._default_nms_iou
+        self._default_nms_conf = conf or self._default_nms_conf
+
+    def _get_pipeline(self, iou: Optional[float] = None, conf: Optional[float] = None) -> DetectionPipeline:
+        """Instantiate the prediction pipeline of this model.
 
-    def predict(self, images, iou: float = 0.65, conf: float = 0.01) -> DetectionResults:
-        if self._class_names is None or self._image_processor is None:
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        if None in (self._class_names, self._image_processor, self._default_nms_iou, self._default_nms_conf):
             raise RuntimeError(
                 "You must set the dataset processing parameters before calling predict.\n" "Please call `model.set_dataset_processing_params(...)` first."
             )
 
+        iou = iou or self._default_nms_iou
+        conf = conf or self._default_nms_conf
+
         pipeline = DetectionPipeline(
             model=self,
             image_processor=self._image_processor,
             post_prediction_callback=self.get_post_prediction_callback(iou=iou, conf=conf),
             class_names=self._class_names,
         )
-        return pipeline(images)
+        return pipeline
+
+    def predict(self, images: ImageSource, iou: Optional[float] = None, conf: Optional[float] = None) -> ImagesDetectionPrediction:
+        """Predict an image or a list of images.
+
+        :param images:  Images to predict.
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        pipeline = self._get_pipeline(iou=iou, conf=conf)
+        return pipeline(images)  # type: ignore
+
+    def predict_webcam(self, iou: Optional[float] = None, conf: Optional[float] = None):
+        """Predict using webcam.
+
+        :param iou:     (Optional) IoU threshold for the nms algorithm. If None, the default value associated to the training is used.
+        :param conf:    (Optional) Below the confidence threshold, prediction are discarded.
+                        If None, the default value associated to the training is used.
+        """
+        pipeline = self._get_pipeline(iou=iou, conf=conf)
+        pipeline.predict_webcam()
 
     def forward(self, x):
         out = self._backbone(x)

diff --git a/src/super_gradients/training/models/model_factory.py b/src/super_gradients/training/models/model_factory.py
@@ -136,8 +136,9 @@ def instantiate_model(
                 net.replace_head(new_num_classes=num_classes_new_head)
                 arch_params.num_classes = num_classes_new_head
 
-            class_names, image_processor = get_pretrained_processing_params(model_name, pretrained_weights)
-            net.set_dataset_processing_params(class_names, image_processor)
+            # TODO: remove once we load it from the checkpoint
+            processing_params = get_pretrained_processing_params(model_name, pretrained_weights)
+            net.set_dataset_processing_params(**processing_params)
 
     _add_model_name_attribute(net, model_name)