Skip to content

Commit

Permalink
Update Rescale transform (#46)
Browse files Browse the repository at this point in the history
* Add support for scale factor in resize transform

* Fix resize for rle masks

* Update changelog
  • Loading branch information
zhiltsov-max committed Jul 1, 2024
1 parent 453c891 commit 5b8ef82
Show file tree
Hide file tree
Showing 3 changed files with 130 additions and 22 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/cvat-ai/datumaro/pull/8>)
- Functions to work with plain polygons (COCO-style) - `close_polygon`, `simplify_polygon`
(<https://github.com/cvat-ai/datumaro/pull/39>)
- An option to specify scale factor in `resize` transform
(<https://github.com/cvat-ai/datumaro/pull/46>)

### Changed
- `env.detect_dataset()` now returns a list of detected formats at all recursion levels
Expand Down Expand Up @@ -96,6 +98,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
(<https://github.com/cvat-ai/datumaro/pull/41>)
- `Dataset.get()` could ignore existing transforms in the dataset
(<https://github.com/cvat-ai/datumaro/pull/45>)
- Failing `resize` transform for RLE masks
(<https://github.com/cvat-ai/datumaro/pull/46>)

### Security
- TBD
Expand Down
73 changes: 59 additions & 14 deletions datumaro/plugins/transforms.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# Copyright (C) 2020-2022 Intel Corporation
# Copyright (C) 2022 CVAT.ai Corporation
# Copyright (C) 2022-2024 CVAT.ai Corporation
#
# SPDX-License-Identifier: MIT

Expand All @@ -14,7 +14,7 @@
from copy import deepcopy
from enum import Enum, auto
from itertools import chain
from typing import Dict, Iterable, List, Optional, Tuple, Union
from typing import Callable, Dict, Iterable, List, Optional, Tuple, Union

import cv2
import numpy as np
Expand Down Expand Up @@ -922,30 +922,55 @@ def transform_item(self, item):

class ResizeTransform(ItemTransform):
"""
Resizes images and annotations in the dataset to the specified size.
Resizes images and annotations in the dataset to the specified size or by the specified factor.
Supports upscaling, downscaling and mixed variants.|n
|n
Examples:|n
- Resize all images to 256x256 size|n
.. code-block::
|s|s%(prog)s -dw 256 -dh 256
|s|s%(prog)s -dw 256 -dh 256|n
|n
- Scale all images 2x by each side|n
.. code-block::
|s|s%(prog)s -sx 2 -sy 2
"""

@classmethod
def build_cmdline_parser(cls, **kwargs):
parser = super().build_cmdline_parser(**kwargs)
parser.add_argument("-dw", "--width", type=int, help="Destination image width")
parser.add_argument("-dh", "--height", type=int, help="Destination image height")
parser.add_argument("-sx", "--scale-x", type=float, help="Scale factor for the x axis")
parser.add_argument("-sy", "--scale-y", type=float, help="Scale factor for the y axis")
return parser

def __init__(self, extractor: IExtractor, width: int, height: int) -> None:
def __init__(
self,
extractor: IExtractor,
*,
width: Optional[int] = None,
height: Optional[int] = None,
scale_x: Optional[float] = None,
scale_y: Optional[float] = None,
) -> None:
super().__init__(extractor)

assert width > 0 and height > 0
width = width or 0
height = height or 0
scale_x = scale_x or 0
scale_y = scale_y or 0
assert (width > 0 and height > 0) ^ (
scale_x > 0 and scale_y > 0
), "width, height, scale_x, scale_y cannot be used together"

self._width = width
self._height = height
self._scale_x = scale_x
self._scale_y = scale_y

@staticmethod
def _lazy_resize_image(image, new_size):
Expand Down Expand Up @@ -975,17 +1000,34 @@ def _resize_image():

return _resize_image

@staticmethod
def _lazy_rle_encode(
lazy_mask: Callable[[], mask_tools.BinaryMask]
) -> Callable[[], mask_tools.CompressedRle]:
def _lazy_encode():
mask = lazy_mask()
h, w = mask.shape[:2]
return mask_tools.to_uncompressed_rle(mask_tools.mask_to_rle(mask), width=w, height=h)

return _lazy_encode

def transform_item(self, item):
if not isinstance(item.media, Image):
raise DatumaroError(
"Item %s: image info is required for this " "transform" % (item.id,)
)
raise DatumaroError("Item %s: image info is required for this transform" % (item.id,))

h, w = item.media.size
xscale = self._width / float(w)
yscale = self._height / float(h)
if self._width and self._height:
xscale = self._width / float(w)
yscale = self._height / float(h)
new_size = (self._height, self._width)
elif self._scale_x and self._scale_y:
xscale = self._scale_x
yscale = self._scale_y
new_size = (round(h * self._scale_y), round(w * self._scale_x))
else:
assert False, "Unexpected scale configuration"

new_size = (self._height, self._width)
new_size = tuple(map(int, new_size))

resized_image = None
if item.media.has_data:
Expand Down Expand Up @@ -1013,8 +1055,11 @@ def transform_item(self, item):
)
)
elif isinstance(ann, Mask):
rescaled_mask = self._lazy_resize_mask(ann, new_size)
resized_annotations.append(ann.wrap(image=rescaled_mask))
lazy_mask = self._lazy_resize_mask(ann, new_size)
if isinstance(ann, RleMask):
resized_annotations.append(ann.wrap(rle=self._lazy_rle_encode(lazy_mask)))
else:
resized_annotations.append(ann.wrap(image=lazy_mask))
elif isinstance(ann, (Caption, Label)):
resized_annotations.append(ann)
else:
Expand Down
75 changes: 67 additions & 8 deletions tests/test_transforms.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
PointsCategories,
Polygon,
PolyLine,
RleMask,
)
from datumaro.components.dataset import Dataset
from datumaro.components.errors import DatumaroError
Expand Down Expand Up @@ -797,7 +798,7 @@ def test_bboxes_values_decrement_transform(self):
@mark_requirement(Requirements.DATUM_GENERAL_REQ)
@mark_bug(Requirements.DATUM_BUG_618)
def test_can_resize(self):
small_dataset = Dataset.from_iterable(
small_frame_dataset = Dataset.from_iterable(
[
DatasetItem(
id=i,
Expand All @@ -816,7 +817,25 @@ def test_can_resize(self):
[0, 1, 1, 0],
[1, 1, 0, 0],
]
)
),
label=1,
),
RleMask(
rle=mask_tools.to_uncompressed_rle(
mask_tools.mask_to_rle(
np.array(
[
[1, 1, 1, 0],
[1, 1, 0, 1],
[0, 1, 0, 1],
[1, 0, 0, 1],
]
)
),
width=4,
height=4,
),
label=2,
),
],
)
Expand All @@ -825,7 +844,7 @@ def test_can_resize(self):
categories=["a", "b", "c"],
)

big_dataset = Dataset.from_iterable(
big_frame_dataset = Dataset.from_iterable(
[
DatasetItem(
id=i,
Expand All @@ -848,7 +867,23 @@ def test_can_resize(self):
[1, 1, 1, 1, 0, 0, 0, 0],
[1, 1, 1, 1, 0, 0, 0, 0],
]
)
),
label=1,
),
Mask(
np.array(
[
[1, 1, 1, 1, 1, 1, 0, 0],
[1, 1, 1, 1, 1, 1, 0, 0],
[1, 1, 1, 1, 0, 0, 1, 1],
[1, 1, 1, 1, 0, 0, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[0, 0, 1, 1, 0, 0, 1, 1],
[1, 1, 0, 0, 0, 0, 1, 1],
[1, 1, 0, 0, 0, 0, 1, 1],
]
),
label=2,
),
],
)
Expand All @@ -858,12 +893,36 @@ def test_can_resize(self):
)

with self.subTest("upscale"):
actual = transforms.ResizeTransform(small_dataset, width=8, height=8)
compare_datasets(self, big_dataset, actual)
for params in [{"width": 8, "height": 8}, {"scale_x": 2, "scale_y": 2}]:
with self.subTest(params=params):
actual = transforms.ResizeTransform(small_frame_dataset, **params)
compare_datasets(self, big_frame_dataset, actual)

with self.subTest("downscale"):
actual = transforms.ResizeTransform(big_dataset, width=4, height=4)
compare_datasets(self, small_dataset, actual)
for params in [{"width": 4, "height": 4}, {"scale_x": 0.5, "scale_y": 0.5}]:
with self.subTest(params=params):
actual = transforms.ResizeTransform(big_frame_dataset, **params)
compare_datasets(self, small_frame_dataset, actual)

def test_can_use_only_1_set_of_resize_parameters(self):
absolute_params = {"width": 6, "height": 2}
relative_params = {"scale_x": 3, "scale_y": 0.2}

input_dataset = Dataset.from_iterable([DatasetItem(id=1, media=Image(np.ones((10, 2))))])
expected = Dataset.from_iterable([DatasetItem(id=1, media=Image(np.ones((2, 6))))])

with self.subTest(params=absolute_params):
actual = transforms.ResizeTransform(input_dataset, **absolute_params)
compare_datasets(self, expected, actual)

with self.subTest(params=relative_params):
actual = transforms.ResizeTransform(input_dataset, **relative_params)
compare_datasets(self, expected, actual)

params = dict(**absolute_params, **relative_params)
with self.subTest(params=params):
with self.assertRaisesRegex(Exception, "cannot be used together"):
transforms.ResizeTransform(input_dataset, **params)

@mark_bug(Requirements.DATUM_BUG_606)
def test_can_keep_image_ext_on_resize(self):
Expand Down

0 comments on commit 5b8ef82

Please sign in to comment.