Feature/sg 000 propagate imagenet dataset params (#1368)

* Propagate default dataset processing params for other classification models * Fix bug in predict pipeline (Softmax was computed along batch dimension AFTER taking max along classes dimension) * Added more classification models to test
Deci-AI · Aug 11, 2023 · b6499b6 · b6499b6
1 parent fb01f1f
commit b6499b6
Show file tree

Hide file tree

Showing 3 changed files with 30 additions and 15 deletions.
diff --git a/src/super_gradients/training/pipelines/pipelines.py b/src/super_gradients/training/pipelines/pipelines.py
@@ -24,7 +24,6 @@
     ImagesClassificationPrediction,
     ClassificationPrediction,
 )
-from torch.nn.functional import softmax
 from super_gradients.training.utils.utils import generate_batch
 from super_gradients.training.utils.media.video import load_video, includes_video_extension
 from super_gradients.training.utils.media.image import ImageSource, check_image_typing
@@ -410,17 +409,17 @@ def __init__(
     def _decode_model_output(self, model_output: Union[List, Tuple, torch.Tensor], model_input: np.ndarray) -> List[ClassificationPrediction]:
         """Decode the model output
 
-        :param model_output:    Direct output of the model, without any post-processing.
+        :param model_output:    Direct output of the model, without any post-processing. Tensor of shape [B, C]
         :param model_input:     Model input (i.e. images after preprocessing).
         :return:                Predicted Bboxes.
         """
-        confidence_predictions, classifier_predictions = torch.max(model_output, 1)
+        pred_scores, pred_labels = torch.max(model_output.softmax(dim=1), 1)
 
-        classifier_predictions = classifier_predictions.detach().cpu().numpy()
-        confidence_predictions = softmax(confidence_predictions).detach().cpu().numpy()
+        pred_labels = pred_labels.detach().cpu().numpy()  # [B,1]
+        pred_scores = pred_scores.detach().cpu().numpy()  # [B,1]
 
         predictions = list()
-        for prediction, confidence, image_input in zip(classifier_predictions, confidence_predictions, model_input):
+        for prediction, confidence, image_input in zip(pred_labels, pred_scores, model_input):
             predictions.append(ClassificationPrediction(confidence=float(confidence), label=int(prediction), image_shape=image_input.shape))
         return predictions
 

diff --git a/src/super_gradients/training/processing/processing.py b/src/super_gradients/training/processing/processing.py
@@ -641,10 +641,22 @@ def default_dekr_coco_processing_params() -> dict:
     return params
 
 
-def default_resnet_imagenet_processing_params() -> dict:
+def default_imagenet_processing_params() -> dict:
     """Processing parameters commonly used for training resnet on Imagenet dataset."""
     image_processor = ComposeProcessing(
-        [Resize(size=256), CenterCrop(size=224), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), StandardizeImage(), ImagePermute()]
+        [Resize(size=256), CenterCrop(size=224), StandardizeImage(), NormalizeImage(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]), ImagePermute()]
+    )
+    params = dict(
+        class_names=IMAGENET_CLASSES,
+        image_processor=image_processor,
+    )
+    return params
+
+
+def default_vit_imagenet_processing_params() -> dict:
+    """Processing parameters used by ViT for training resnet on Imagenet dataset."""
+    image_processor = ComposeProcessing(
+        [Resize(size=256), CenterCrop(size=224), StandardizeImage(), NormalizeImage(mean=[0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5]), ImagePermute()]
     )
     params = dict(
         class_names=IMAGENET_CLASSES,
@@ -668,7 +680,10 @@ def get_pretrained_processing_params(model_name: str, pretrained_weights: str) -
     if pretrained_weights == "coco_pose" and model_name in ("dekr_w32_no_dc", "dekr_custom"):
         return default_dekr_coco_processing_params()
 
-    if pretrained_weights == "imagenet" and model_name == "resnet18":
-        return default_resnet_imagenet_processing_params()
+    if pretrained_weights == "imagenet" and model_name in {"vit_base", "vit_large", "vit_huge"}:
+        return default_vit_imagenet_processing_params()
+
+    if pretrained_weights == "imagenet":
+        return default_imagenet_processing_params()
 
     return dict()
diff --git a/tests/unit_tests/test_predict.py b/tests/unit_tests/test_predict.py
@@ -16,12 +16,13 @@ def setUp(self) -> None:
         ]
 
     def test_classification_models(self):
-        model = models.get(Models.RESNET18, pretrained_weights="imagenet")
-
         with tempfile.TemporaryDirectory() as tmp_dirname:
-            predictions = model.predict(self.images)
-            predictions.show()
-            predictions.save(output_folder=tmp_dirname)
+            for model_name in {Models.RESNET18, Models.EFFICIENTNET_B0, Models.MOBILENET_V2, Models.REGNETY200}:
+                model = models.get(model_name, pretrained_weights="imagenet")
+
+                predictions = model.predict(self.images)
+                predictions.show()
+                predictions.save(output_folder=tmp_dirname)
 
     def test_pose_estimation_models(self):
         model = models.get(Models.DEKR_W32_NO_DC, pretrained_weights="coco_pose")