Update Rescale transform (#46)

* Add support for scale factor in resize transform * Fix resize for rle masks * Update changelog
cvat-ai · Jul 1, 2024 · 5b8ef82 · 5b8ef82
1 parent 453c891
commit 5b8ef82
Show file tree

Hide file tree

Showing 3 changed files with 130 additions and 22 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -41,6 +41,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/cvat-ai/datumaro/pull/8>)
 - Functions to work with plain polygons (COCO-style) - `close_polygon`, `simplify_polygon`
   (<https://github.com/cvat-ai/datumaro/pull/39>)
+- An option to specify scale factor in `resize` transform
+  (<https://github.com/cvat-ai/datumaro/pull/46>)
 
 ### Changed
 - `env.detect_dataset()` now returns a list of detected formats at all recursion levels
@@ -96,6 +98,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/cvat-ai/datumaro/pull/41>)
 - `Dataset.get()` could ignore existing transforms in the dataset
   (<https://github.com/cvat-ai/datumaro/pull/45>)
+- Failing `resize` transform for RLE masks
+  (<https://github.com/cvat-ai/datumaro/pull/46>)
 
 ### Security
 - TBD

diff --git a/datumaro/plugins/transforms.py b/datumaro/plugins/transforms.py
@@ -1,5 +1,5 @@
 # Copyright (C) 2020-2022 Intel Corporation
-# Copyright (C) 2022 CVAT.ai Corporation
+# Copyright (C) 2022-2024 CVAT.ai Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -14,7 +14,7 @@
 from copy import deepcopy
 from enum import Enum, auto
 from itertools import chain
-from typing import Dict, Iterable, List, Optional, Tuple, Union
+from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union
 
 import cv2
 import numpy as np
@@ -922,30 +922,55 @@ def transform_item(self, item):
 
 class ResizeTransform(ItemTransform):
     """
-    Resizes images and annotations in the dataset to the specified size.
+    Resizes images and annotations in the dataset to the specified size or by the specified factor.
     Supports upscaling, downscaling and mixed variants.|n
     |n
     Examples:|n
         - Resize all images to 256x256 size|n
 
         .. code-block::
 
-        |s|s%(prog)s -dw 256 -dh 256
+        |s|s%(prog)s -dw 256 -dh 256|n
+        |n
+        - Scale all images 2x by each side|n
+
+        .. code-block::
+
+        |s|s%(prog)s -sx 2 -sy 2
     """
 
     @classmethod
     def build_cmdline_parser(cls, **kwargs):
         parser = super().build_cmdline_parser(**kwargs)
         parser.add_argument("-dw", "--width", type=int, help="Destination image width")
         parser.add_argument("-dh", "--height", type=int, help="Destination image height")
+        parser.add_argument("-sx", "--scale-x", type=float, help="Scale factor for the x axis")
+        parser.add_argument("-sy", "--scale-y", type=float, help="Scale factor for the y axis")
         return parser
 
-    def __init__(self, extractor: IExtractor, width: int, height: int) -> None:
+    def __init__(
+        self,
+        extractor: IExtractor,
+        *,
+        width: Optional[int] = None,
+        height: Optional[int] = None,
+        scale_x: Optional[float] = None,
+        scale_y: Optional[float] = None,
+    ) -> None:
         super().__init__(extractor)
 
-        assert width > 0 and height > 0
+        width = width or 0
+        height = height or 0
+        scale_x = scale_x or 0
+        scale_y = scale_y or 0
+        assert (width > 0 and height > 0) ^ (
+            scale_x > 0 and scale_y > 0
+        ), "width, height, scale_x, scale_y cannot be used together"
+
         self._width = width
         self._height = height
+        self._scale_x = scale_x
+        self._scale_y = scale_y
 
     @staticmethod
     def _lazy_resize_image(image, new_size):
@@ -975,17 +1000,34 @@ def _resize_image():
 
         return _resize_image
 
+    @staticmethod
+    def _lazy_rle_encode(
+        lazy_mask: Callable[[], mask_tools.BinaryMask]
+    ) -> Callable[[], mask_tools.CompressedRle]:
+        def _lazy_encode():
+            mask = lazy_mask()
+            h, w = mask.shape[:2]
+            return mask_tools.to_uncompressed_rle(mask_tools.mask_to_rle(mask), width=w, height=h)
+
+        return _lazy_encode
+
     def transform_item(self, item):
         if not isinstance(item.media, Image):
-            raise DatumaroError(
-                "Item %s: image info is required for this " "transform" % (item.id,)
-            )
+            raise DatumaroError("Item %s: image info is required for this transform" % (item.id,))
 
         h, w = item.media.size
-        xscale = self._width / float(w)
-        yscale = self._height / float(h)
+        if self._width and self._height:
+            xscale = self._width / float(w)
+            yscale = self._height / float(h)
+            new_size = (self._height, self._width)
+        elif self._scale_x and self._scale_y:
+            xscale = self._scale_x
+            yscale = self._scale_y
+            new_size = (round(h * self._scale_y), round(w * self._scale_x))
+        else:
+            assert False, "Unexpected scale configuration"
 
-        new_size = (self._height, self._width)
+        new_size = tuple(map(int, new_size))
 
         resized_image = None
         if item.media.has_data:
@@ -1013,8 +1055,11 @@ def transform_item(self, item):
                     )
                 )
             elif isinstance(ann, Mask):
-                rescaled_mask = self._lazy_resize_mask(ann, new_size)
-                resized_annotations.append(ann.wrap(image=rescaled_mask))
+                lazy_mask = self._lazy_resize_mask(ann, new_size)
+                if isinstance(ann, RleMask):
+                    resized_annotations.append(ann.wrap(rle=self._lazy_rle_encode(lazy_mask)))
+                else:
+                    resized_annotations.append(ann.wrap(image=lazy_mask))
             elif isinstance(ann, (Caption, Label)):
                 resized_annotations.append(ann)
             else:

diff --git a/tests/test_transforms.py b/tests/test_transforms.py
@@ -19,6 +19,7 @@
     PointsCategories,
     Polygon,
     PolyLine,
+    RleMask,
 )
 from datumaro.components.dataset import Dataset
 from datumaro.components.errors import DatumaroError
@@ -797,7 +798,7 @@ def test_bboxes_values_decrement_transform(self):
     @mark_requirement(Requirements.DATUM_GENERAL_REQ)
     @mark_bug(Requirements.DATUM_BUG_618)
     def test_can_resize(self):
-        small_dataset = Dataset.from_iterable(
+        small_frame_dataset = Dataset.from_iterable(
             [
                 DatasetItem(
                     id=i,
@@ -816,7 +817,25 @@ def test_can_resize(self):
                                     [0, 1, 1, 0],
                                     [1, 1, 0, 0],
                                 ]
-                            )
+                            ),
+                            label=1,
+                        ),
+                        RleMask(
+                            rle=mask_tools.to_uncompressed_rle(
+                                mask_tools.mask_to_rle(
+                                    np.array(
+                                        [
+                                            [1, 1, 1, 0],
+                                            [1, 1, 0, 1],
+                                            [0, 1, 0, 1],
+                                            [1, 0, 0, 1],
+                                        ]
+                                    )
+                                ),
+                                width=4,
+                                height=4,
+                            ),
+                            label=2,
                         ),
                     ],
                 )
@@ -825,7 +844,7 @@ def test_can_resize(self):
             categories=["a", "b", "c"],
         )
 
-        big_dataset = Dataset.from_iterable(
+        big_frame_dataset = Dataset.from_iterable(
             [
                 DatasetItem(
                     id=i,
@@ -848,7 +867,23 @@ def test_can_resize(self):
                                     [1, 1, 1, 1, 0, 0, 0, 0],
                                     [1, 1, 1, 1, 0, 0, 0, 0],
                                 ]
-                            )
+                            ),
+                            label=1,
+                        ),
+                        Mask(
+                            np.array(
+                                [
+                                    [1, 1, 1, 1, 1, 1, 0, 0],
+                                    [1, 1, 1, 1, 1, 1, 0, 0],
+                                    [1, 1, 1, 1, 0, 0, 1, 1],
+                                    [1, 1, 1, 1, 0, 0, 1, 1],
+                                    [0, 0, 1, 1, 0, 0, 1, 1],
+                                    [0, 0, 1, 1, 0, 0, 1, 1],
+                                    [1, 1, 0, 0, 0, 0, 1, 1],
+                                    [1, 1, 0, 0, 0, 0, 1, 1],
+                                ]
+                            ),
+                            label=2,
                         ),
                     ],
                 )
@@ -858,12 +893,36 @@ def test_can_resize(self):
         )
 
         with self.subTest("upscale"):
-            actual = transforms.ResizeTransform(small_dataset, width=8, height=8)
-            compare_datasets(self, big_dataset, actual)
+            for params in [{"width": 8, "height": 8}, {"scale_x": 2, "scale_y": 2}]:
+                with self.subTest(params=params):
+                    actual = transforms.ResizeTransform(small_frame_dataset, **params)
+                    compare_datasets(self, big_frame_dataset, actual)
 
         with self.subTest("downscale"):
-            actual = transforms.ResizeTransform(big_dataset, width=4, height=4)
-            compare_datasets(self, small_dataset, actual)
+            for params in [{"width": 4, "height": 4}, {"scale_x": 0.5, "scale_y": 0.5}]:
+                with self.subTest(params=params):
+                    actual = transforms.ResizeTransform(big_frame_dataset, **params)
+                    compare_datasets(self, small_frame_dataset, actual)
+
+    def test_can_use_only_1_set_of_resize_parameters(self):
+        absolute_params = {"width": 6, "height": 2}
+        relative_params = {"scale_x": 3, "scale_y": 0.2}
+
+        input_dataset = Dataset.from_iterable([DatasetItem(id=1, media=Image(np.ones((10, 2))))])
+        expected = Dataset.from_iterable([DatasetItem(id=1, media=Image(np.ones((2, 6))))])
+
+        with self.subTest(params=absolute_params):
+            actual = transforms.ResizeTransform(input_dataset, **absolute_params)
+            compare_datasets(self, expected, actual)
+
+        with self.subTest(params=relative_params):
+            actual = transforms.ResizeTransform(input_dataset, **relative_params)
+            compare_datasets(self, expected, actual)
+
+        params = dict(**absolute_params, **relative_params)
+        with self.subTest(params=params):
+            with self.assertRaisesRegex(Exception, "cannot be used together"):
+                transforms.ResizeTransform(input_dataset, **params)
 
     @mark_bug(Requirements.DATUM_BUG_606)
     def test_can_keep_image_ext_on_resize(self):