diff --git a/test/common_utils.py b/test/common_utils.py index 3de113b533c..abefd07c43d 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -7,9 +7,11 @@ import os import pathlib import random +import re import shutil import sys import tempfile +import warnings from collections import defaultdict from subprocess import CalledProcessError, check_output, STDOUT from typing import Callable, Sequence, Tuple, Union @@ -880,3 +882,23 @@ def assert_run_python_script(source_code): raise RuntimeError(f"script errored with output:\n{e.output.decode()}") if out != b"": raise AssertionError(out.decode()) + + +@contextlib.contextmanager +def assert_no_warnings(): + # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes + # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit. + with warnings.catch_warnings(): + warnings.simplefilter("error") + yield + + +@contextlib.contextmanager +def ignore_jit_no_profile_information_warning(): + # Calling a scripted object often triggers a warning like + # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information` + # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore + # them. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning) + yield diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index 71df9ad72d8..935d25edd6d 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -1711,8 +1711,6 @@ def test_antialias_warning(): tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8) match = "The default value of the antialias parameter" - with pytest.warns(UserWarning, match=match): - transforms.Resize((20, 20))(tensor_img) with pytest.warns(UserWarning, match=match): transforms.RandomResizedCrop((20, 20))(tensor_img) with pytest.warns(UserWarning, match=match): @@ -1722,18 +1720,6 @@ def test_antialias_warning(): with pytest.warns(UserWarning, match=match): transforms.RandomResize(10, 20)(tensor_img) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize(tensor_img, (20, 20)) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize_image_tensor(tensor_img, (20, 20)) - - with pytest.warns(UserWarning, match=match): - transforms.functional.resize(tensor_video, (20, 20)) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize_video(tensor_video, (20, 20)) - - with pytest.warns(UserWarning, match=match): - datapoints.Image(tensor_img).resize((20, 20)) with pytest.warns(UserWarning, match=match): datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20)) @@ -1744,27 +1730,17 @@ def test_antialias_warning(): with warnings.catch_warnings(): warnings.simplefilter("error") - transforms.Resize((20, 20))(pil_img) transforms.RandomResizedCrop((20, 20))(pil_img) transforms.ScaleJitter((20, 20))(pil_img) transforms.RandomShortestSize((20, 20))(pil_img) transforms.RandomResize(10, 20)(pil_img) - transforms.functional.resize(pil_img, (20, 20)) - transforms.Resize((20, 20), antialias=True)(tensor_img) transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img) transforms.ScaleJitter((20, 20), antialias=True)(tensor_img) transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img) transforms.RandomResize(10, 20, antialias=True)(tensor_img) - transforms.functional.resize(tensor_img, (20, 20), antialias=True) - transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True) - transforms.functional.resize(tensor_video, (20, 20), antialias=True) - transforms.functional.resize_video(tensor_video, (20, 20), antialias=True) - - datapoints.Image(tensor_img).resize((20, 20), antialias=True) datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) - datapoints.Video(tensor_video).resize((20, 20), antialias=True) datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py new file mode 100644 index 00000000000..2b9565c74c8 --- /dev/null +++ b/test/test_transforms_v2_refactored.py @@ -0,0 +1,721 @@ +import contextlib +import inspect +import re +from typing import get_type_hints +from unittest import mock + +import numpy as np +import PIL.Image +import pytest + +import torch +import torchvision.transforms.v2 as transforms +from common_utils import ( + assert_equal, + assert_no_warnings, + cache, + cpu_and_cuda, + ignore_jit_no_profile_information_warning, + make_bounding_box, + make_detection_mask, + make_image, + make_segmentation_mask, + make_video, +) +from torch.testing import assert_close +from torchvision import datapoints +from torchvision.transforms.functional import pil_modes_mapping +from torchvision.transforms.v2 import functional as F + + +def _to_tolerances(maybe_tolerance_dict): + if not isinstance(maybe_tolerance_dict, dict): + return dict(rtol=None, atol=None) + + tolerances = dict(rtol=0, atol=0) + tolerances.update(maybe_tolerance_dict) + return tolerances + + +def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel produces closes results for inputs on GPU and CPU.""" + if input.device.type != "cuda": + return + + input_cuda = input.as_subclass(torch.Tensor) + input_cpu = input_cuda.to("cpu") + + actual = kernel(input_cuda, *args, **kwargs) + expected = kernel(input_cpu, *args, **kwargs) + + assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol) + + +@cache +def _script(fn): + try: + return torch.jit.script(fn) + except Exception as error: + raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error + + +def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel is scriptable and if the scripted output is close to the eager one.""" + if input.device.type != "cpu": + return + + kernel_scripted = _script(kernel) + + input = input.as_subclass(torch.Tensor) + with ignore_jit_no_profile_information_warning(): + actual = kernel_scripted(input, *args, **kwargs) + expected = kernel(input, *args, **kwargs) + + assert_close(actual, expected, rtol=rtol, atol=atol) + + +def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel produces close results for batched and unbatched inputs.""" + unbatched_input = input.as_subclass(torch.Tensor) + + for batch_dims in [(2,), (2, 1)]: + repeats = [*batch_dims, *[1] * input.ndim] + + actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs) + + expected = kernel(unbatched_input, *args, **kwargs) + # We can't directly call `.repeat()` on the output, since some kernel also return some additional metadata + if isinstance(expected, torch.Tensor): + expected = expected.repeat(repeats) + else: + tensor, *metadata = expected + expected = (tensor.repeat(repeats), *metadata) + + assert_close(actual, expected, rtol=rtol, atol=atol) + + for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]: + degenerate_batched_input = torch.empty( + degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device + ) + + output = kernel(degenerate_batched_input, *args, **kwargs) + # Most kernels just return a tensor, but some also return some additional metadata + if not isinstance(output, torch.Tensor): + output, *_ = output + + assert output.shape[: -input.ndim] == degenerate_batch_dims + + +def check_kernel( + kernel, + input, + *args, + check_cuda_vs_cpu=True, + check_scripted_vs_eager=True, + check_batched_vs_unbatched=True, + **kwargs, +): + initial_input_version = input._version + + output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs) + # Most kernels just return a tensor, but some also return some additional metadata + if not isinstance(output, torch.Tensor): + output, *_ = output + + # check that no inplace operation happened + assert input._version == initial_input_version + + assert output.dtype == input.dtype + assert output.device == input.device + + if check_cuda_vs_cpu: + _check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu)) + + if check_scripted_vs_eager: + _check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager)) + + if check_batched_vs_unbatched: + _check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched)) + + +def _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs): + """Checks if the dispatcher can be scripted and the scripted version can be called without error.""" + if not isinstance(input, datapoints.Image): + return + + dispatcher_scripted = _script(dispatcher) + with ignore_jit_no_profile_information_warning(): + dispatcher_scripted(input.as_subclass(torch.Tensor), *args, **kwargs) + + +def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs): + """Checks if the dispatcher correctly dispatches the input to the corresponding kernel and that the input type is + preserved in doing so. For bounding boxes also checks that the format is preserved. + """ + if isinstance(input, datapoints._datapoint.Datapoint): + # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly, + # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel. + spy = mock.MagicMock(wraps=kernel) + with mock.patch.object(F, kernel.__name__, spy): + # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class. + # Since that is not the case here, we need to prefix f"_{cls.__name__}" + # See https://docs.python.org/3/tutorial/classes.html#private-variables for details + with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F): + output = dispatcher(input, *args, **kwargs) + + spy.assert_called_once() + else: + with mock.patch(f"{dispatcher.__module__}.{kernel.__name__}", wraps=kernel) as spy: + output = dispatcher(input, *args, **kwargs) + + spy.assert_called_once() + + assert isinstance(output, type(input)) + + if isinstance(input, datapoints.BoundingBox): + assert output.format == input.format + + +def check_dispatcher( + dispatcher, + kernel, + input, + *args, + check_scripted_smoke=True, + check_dispatch=True, + **kwargs, +): + with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy: + dispatcher(input, *args, **kwargs) + + spy.assert_any_call(f"{dispatcher.__module__}.{dispatcher.__name__}") + + unknown_input = object() + with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))): + dispatcher(unknown_input, *args, **kwargs) + + if check_scripted_smoke: + _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs) + + if check_dispatch: + _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs) + + +def _check_dispatcher_kernel_signature_match(dispatcher, *, kernel, input_type): + """Checks if the signature of the dispatcher matches the kernel signature.""" + dispatcher_signature = inspect.signature(dispatcher) + dispatcher_params = list(dispatcher_signature.parameters.values())[1:] + + kernel_signature = inspect.signature(kernel) + kernel_params = list(kernel_signature.parameters.values())[1:] + + if issubclass(input_type, datapoints._datapoint.Datapoint): + # We filter out metadata that is implicitly passed to the dispatcher through the input datapoint, but has to be + # explicitly passed to the kernel. + kernel_params = [param for param in kernel_params if param.name not in input_type.__annotations__.keys()] + + dispatcher_params = iter(dispatcher_params) + for dispatcher_param, kernel_param in zip(dispatcher_params, kernel_params): + try: + # In general, the dispatcher parameters are a superset of the kernel parameters. Thus, we filter out + # dispatcher parameters that have no kernel equivalent while keeping the order intact. + while dispatcher_param.name != kernel_param.name: + dispatcher_param = next(dispatcher_params) + except StopIteration: + raise AssertionError( + f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` " + f"has no corresponding parameter on the dispatcher `{dispatcher.__name__}`." + ) from None + + if issubclass(input_type, PIL.Image.Image): + # PIL kernels often have more correct annotations, since they are not limited by JIT. Thus, we don't check + # them in the first place. + dispatcher_param._annotation = kernel_param._annotation = inspect.Parameter.empty + + assert dispatcher_param == kernel_param + + +def _check_dispatcher_datapoint_signature_match(dispatcher): + """Checks if the signature of the dispatcher matches the corresponding method signature on the Datapoint class.""" + dispatcher_signature = inspect.signature(dispatcher) + dispatcher_params = list(dispatcher_signature.parameters.values())[1:] + + datapoint_method = getattr(datapoints._datapoint.Datapoint, dispatcher.__name__) + datapoint_signature = inspect.signature(datapoint_method) + datapoint_params = list(datapoint_signature.parameters.values())[1:] + + # Some annotations in the `datapoints._datapoint` module + # are stored as strings. The block below makes them concrete again (non-strings), so they can be compared to the + # natively concrete dispatcher annotations. + datapoint_annotations = get_type_hints(datapoint_method) + for param in datapoint_params: + param._annotation = datapoint_annotations[param.name] + + assert dispatcher_params == datapoint_params + + +def check_dispatcher_signatures_match(dispatcher, *, kernel, input_type): + _check_dispatcher_kernel_signature_match(dispatcher, kernel=kernel, input_type=input_type) + _check_dispatcher_datapoint_signature_match(dispatcher) + + +def _check_transform_v1_compatibility(transform, input): + """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static + ``get_params`` method, is scriptable, and the scripted version can be called without error.""" + if not hasattr(transform, "_v1_transform_cls"): + return + + if type(input) is not torch.Tensor: + return + + if hasattr(transform._v1_transform_cls, "get_params"): + assert type(transform).get_params is transform._v1_transform_cls.get_params + + scripted_transform = _script(transform) + with ignore_jit_no_profile_information_warning(): + scripted_transform(input) + + +def check_transform(transform_cls, input, *args, **kwargs): + transform = transform_cls(*args, **kwargs) + + output = transform(input) + assert isinstance(output, type(input)) + + if isinstance(input, datapoints.BoundingBox): + assert output.format == input.format + + _check_transform_v1_compatibility(transform, input) + + +def transform_cls_to_functional(transform_cls): + def wrapper(input, *args, **kwargs): + transform = transform_cls(*args, **kwargs) + return transform(input) + + wrapper.__name__ = transform_cls.__name__ + + return wrapper + + +# We cannot use `list(transforms.InterpolationMode)` here, since it includes some PIL-only ones as well +INTERPOLATION_MODES = [ + transforms.InterpolationMode.NEAREST, + transforms.InterpolationMode.NEAREST_EXACT, + transforms.InterpolationMode.BILINEAR, + transforms.InterpolationMode.BICUBIC, +] + + +@contextlib.contextmanager +def assert_warns_antialias_default_value(): + with pytest.warns(UserWarning, match="The default value of the antialias parameter of all the resizing transforms"): + yield + + +def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix): + def transform(bbox, affine_matrix_, format_, spatial_size_): + # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1 + in_dtype = bbox.dtype + if not torch.is_floating_point(bbox): + bbox = bbox.float() + bbox_xyxy = F.convert_format_bounding_box( + bbox.as_subclass(torch.Tensor), + old_format=format_, + new_format=datapoints.BoundingBoxFormat.XYXY, + inplace=True, + ) + points = np.array( + [ + [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], + [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], + [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], + [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], + ] + ) + transformed_points = np.matmul(points, affine_matrix_.T) + out_bbox = torch.tensor( + [ + np.min(transformed_points[:, 0]).item(), + np.min(transformed_points[:, 1]).item(), + np.max(transformed_points[:, 0]).item(), + np.max(transformed_points[:, 1]).item(), + ], + dtype=bbox_xyxy.dtype, + ) + out_bbox = F.convert_format_bounding_box( + out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format_, inplace=True + ) + # It is important to clamp before casting, especially for CXCYWH format, dtype=int64 + out_bbox = F.clamp_bounding_box(out_bbox, format=format_, spatial_size=spatial_size_) + out_bbox = out_bbox.to(dtype=in_dtype) + return out_bbox + + if bounding_box.ndim < 2: + bounding_box = [bounding_box] + + expected_bboxes = [transform(bbox, affine_matrix, format, spatial_size) for bbox in bounding_box] + if len(expected_bboxes) > 1: + expected_bboxes = torch.stack(expected_bboxes) + else: + expected_bboxes = expected_bboxes[0] + + return expected_bboxes + + +class TestResize: + INPUT_SIZE = (17, 11) + OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)] + + def _make_max_size_kwarg(self, *, use_max_size, size): + if use_max_size: + if not (isinstance(size, int) or len(size) == 1): + # This would result in an `ValueError` + return None + + max_size = (size if isinstance(size, int) else size[0]) + 1 + else: + max_size = None + + return dict(max_size=max_size) + + def _make_input(self, input_type, *, dtype=None, device="cpu", **kwargs): + if input_type in {torch.Tensor, PIL.Image.Image, datapoints.Image}: + input = make_image(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs) + if input_type is torch.Tensor: + input = input.as_subclass(torch.Tensor) + elif input_type is PIL.Image.Image: + input = F.to_image_pil(input) + elif input_type is datapoints.BoundingBox: + kwargs.setdefault("format", datapoints.BoundingBoxFormat.XYXY) + input = make_bounding_box( + spatial_size=self.INPUT_SIZE, + dtype=dtype or torch.float32, + device=device, + **kwargs, + ) + elif input_type is datapoints.Mask: + input = make_segmentation_mask(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs) + elif input_type is datapoints.Video: + input = make_video(size=self.INPUT_SIZE, dtype=dtype or torch.uint8, device=device, **kwargs) + + return input + + def _compute_output_size(self, *, input_size, size, max_size): + if not (isinstance(size, int) or len(size) == 1): + return tuple(size) + + if not isinstance(size, int): + size = size[0] + + old_height, old_width = input_size + ratio = old_width / old_height + if ratio > 1: + new_height = size + new_width = int(ratio * new_height) + else: + new_width = size + new_height = int(new_width / ratio) + + if max_size is not None and max(new_height, new_width) > max_size: + # Need to recompute the aspect ratio, since it might have changed due to rounding + ratio = new_width / new_height + if ratio > 1: + new_width = max_size + new_height = int(new_width / ratio) + else: + new_height = max_size + new_width = int(new_height * ratio) + + return new_height, new_width + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("antialias", [True, False]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, dtype, device): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA. + # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that. + atol = 30 if transforms.InterpolationMode.BICUBIC and dtype is torch.uint8 else 1 + check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol) + + check_kernel( + F.resize_image_tensor, + self._make_input(datapoints.Image, dtype=dtype, device=device), + size=size, + interpolation=interpolation, + **max_size_kwarg, + antialias=antialias, + check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances, + check_scripted_vs_eager=not isinstance(size, int), + ) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + bounding_box = self._make_input(datapoints.BoundingBox, dtype=dtype, device=device, format=format) + check_kernel( + F.resize_bounding_box, + bounding_box, + spatial_size=bounding_box.spatial_size, + size=size, + **max_size_kwarg, + check_scripted_vs_eager=not isinstance(size, int), + ) + + @pytest.mark.parametrize( + "dtype_and_make_mask", [(torch.uint8, make_segmentation_mask), (torch.bool, make_detection_mask)] + ) + def test_kernel_mask(self, dtype_and_make_mask): + dtype, make_mask = dtype_and_make_mask + check_kernel(F.resize_mask, make_mask(dtype=dtype), size=self.OUTPUT_SIZES[-1]) + + def test_kernel_video(self): + check_kernel(F.resize_video, self._make_input(datapoints.Video), size=self.OUTPUT_SIZES[-1], antialias=True) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize( + "input_type_and_kernel", + [ + (torch.Tensor, F.resize_image_tensor), + (PIL.Image.Image, F.resize_image_pil), + (datapoints.Image, F.resize_image_tensor), + (datapoints.BoundingBox, F.resize_bounding_box), + (datapoints.Mask, F.resize_mask), + (datapoints.Video, F.resize_video), + ], + ) + def test_dispatcher(self, size, input_type_and_kernel): + input_type, kernel = input_type_and_kernel + check_dispatcher( + F.resize, + kernel, + self._make_input(input_type), + size=size, + antialias=True, + check_scripted_smoke=not isinstance(size, int), + ) + + @pytest.mark.parametrize( + ("input_type", "kernel"), + [ + (torch.Tensor, F.resize_image_tensor), + (PIL.Image.Image, F.resize_image_pil), + (datapoints.Image, F.resize_image_tensor), + (datapoints.BoundingBox, F.resize_bounding_box), + (datapoints.Mask, F.resize_mask), + (datapoints.Video, F.resize_video), + ], + ) + def test_dispatcher_signature(self, kernel, input_type): + check_dispatcher_signatures_match(F.resize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + ) + def test_transform(self, size, device, input_type): + input = self._make_input(input_type, device=device) + + check_transform( + transforms.Resize, + input, + size=size, + antialias=True, + ) + + def _check_output_size(self, input, output, *, size, max_size): + assert tuple(F.get_spatial_size(output)) == self._compute_output_size( + input_size=F.get_spatial_size(input), size=size, max_size=max_size + ) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2. + # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT` + @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST}) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)]) + def test_image_correctness(self, size, interpolation, use_max_size, fn): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + image = self._make_input(torch.Tensor, dtype=torch.uint8, device="cpu") + + actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True) + expected = F.to_image_tensor( + F.resize(F.to_image_pil(image), size=size, interpolation=interpolation, **max_size_kwarg) + ) + + self._check_output_size(image, actual, size=size, **max_size_kwarg) + torch.testing.assert_close(actual, expected, atol=1, rtol=0) + + def _reference_resize_bounding_box(self, bounding_box, *, size, max_size=None): + old_height, old_width = bounding_box.spatial_size + new_height, new_width = self._compute_output_size( + input_size=bounding_box.spatial_size, size=size, max_size=max_size + ) + + if (old_height, old_width) == (new_height, new_width): + return bounding_box + + affine_matrix = np.array( + [ + [new_width / old_width, 0, 0], + [0, new_height / old_height, 0], + ], + dtype="float64" if bounding_box.dtype == torch.float64 else "float32", + ) + + expected_bboxes = reference_affine_bounding_box_helper( + bounding_box, + format=bounding_box.format, + spatial_size=(new_height, new_width), + affine_matrix=affine_matrix, + ) + return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes, spatial_size=(new_height, new_width)) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)]) + def test_bounding_box_correctness(self, format, size, use_max_size, fn): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + bounding_box = self._make_input(datapoints.BoundingBox) + + actual = fn(bounding_box, size=size, **max_size_kwarg) + expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg) + + self._check_output_size(bounding_box, actual, size=size, **max_size_kwarg) + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES)) + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], + ) + def test_pil_interpolation_compat_smoke(self, interpolation, input_type): + input = self._make_input(input_type) + + with ( + contextlib.nullcontext() + if isinstance(input, PIL.Image.Image) + # This error is triggered in PyTorch core + else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}") + ): + F.resize( + input, + size=self.OUTPUT_SIZES[0], + interpolation=interpolation, + ) + + def test_dispatcher_pil_antialias_warning(self): + with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"): + F.resize(self._make_input(PIL.Image.Image), size=self.OUTPUT_SIZES[0], antialias=False) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + ) + def test_max_size_error(self, size, input_type): + if isinstance(size, int) or len(size) == 1: + max_size = (size if isinstance(size, int) else size[0]) - 1 + match = "must be strictly greater than the requested size" + else: + # value can be anything other than None + max_size = -1 + match = "size should be an int or a sequence of length 1" + + with pytest.raises(ValueError, match=match): + F.resize(self._make_input(input_type), size=size, max_size=max_size, antialias=True) + + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, datapoints.Image, datapoints.Video], + ) + def test_antialias_warning(self, interpolation, input_type): + with ( + assert_warns_antialias_default_value() + if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC} + else assert_no_warnings() + ): + F.resize(self._make_input(input_type), size=self.OUTPUT_SIZES[0], interpolation=interpolation) + + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.Video], + ) + def test_interpolation_int(self, interpolation, input_type): + # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to + # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a + # difference and thus we don't test it here. + if issubclass(input_type, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: + return + + input = self._make_input(input_type) + + expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True) + actual = F.resize( + input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True + ) + + assert_equal(actual, expected) + + def test_transform_unknown_size_error(self): + with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"): + transforms.Resize(size=object()) + + @pytest.mark.parametrize( + "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)] + ) + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + ) + def test_noop(self, size, input_type): + input = self._make_input(input_type) + + output = F.resize(input, size=size, antialias=True) + + # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there + # is a good reason to break this, feel free to downgrade to an equality check. + if isinstance(input, datapoints._datapoint.Datapoint): + # We can't test identity directly, since that checks for the identity of the Python object. Since all + # datapoints unwrap before a kernel and wrap again afterwards, the Python object changes. Thus, we check + # that the underlying storage is the same + assert output.data_ptr() == input.data_ptr() + else: + assert output is input + + @pytest.mark.parametrize( + "input_type", + [torch.Tensor, PIL.Image.Image, datapoints.Image, datapoints.BoundingBox, datapoints.Mask, datapoints.Video], + ) + def test_no_regression_5405(self, input_type): + # Checks that `max_size` is not ignored if `size == small_edge_size` + # See https://github.com/pytorch/vision/issues/5405 + + input = self._make_input(input_type) + + size = min(F.get_spatial_size(input)) + max_size = size + 1 + output = F.resize(input, size=size, max_size=max_size, antialias=True) + + assert max(F.get_spatial_size(output)) == max_size diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 1d9dd025254..cb1bc257e50 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -148,19 +148,6 @@ def fill_sequence_needs_broadcast(args_kwargs): }, pil_kernel_info=PILKernelInfo(F.horizontal_flip_image_pil, kernel_name="horizontal_flip_image_pil"), ), - DispatcherInfo( - F.resize, - kernels={ - datapoints.Image: F.resize_image_tensor, - datapoints.Video: F.resize_video, - datapoints.BoundingBox: F.resize_bounding_box, - datapoints.Mask: F.resize_mask, - }, - pil_kernel_info=PILKernelInfo(F.resize_image_pil), - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), DispatcherInfo( F.affine, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 7b877fb092d..547e708b726 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -238,179 +238,6 @@ def reference_inputs_flip_bounding_box(): ) -def _get_resize_sizes(spatial_size): - height, width = spatial_size - length = max(spatial_size) - yield length - yield [length] - yield (length,) - new_height = int(height * 0.75) - new_width = int(width * 1.25) - yield [new_height, new_width] - yield height, width - - -def sample_inputs_resize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]): - for size in _get_resize_sizes(image_loader.spatial_size): - yield ArgsKwargs(image_loader, size=size) - - for image_loader, interpolation in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"]), - [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], - ): - yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) - - yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25) - - -def sample_inputs_resize_image_tensor_bicubic(): - for image_loader, interpolation in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC] - ): - yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) - - -@pil_reference_wrapper -def reference_resize_image_tensor(*args, **kwargs): - if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in { - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - }: - raise pytest.UsageError("Anti-aliasing is always active in PIL") - return F.resize_image_pil(*args, **kwargs) - - -def reference_inputs_resize_image_tensor(): - for image_loader, interpolation in itertools.product( - make_image_loaders_for_interpolation(), - [ - F.InterpolationMode.NEAREST, - F.InterpolationMode.NEAREST_EXACT, - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - ], - ): - for size in _get_resize_sizes(image_loader.spatial_size): - yield ArgsKwargs( - image_loader, - size=size, - interpolation=interpolation, - antialias=interpolation - in { - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - }, - ) - - -def sample_inputs_resize_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(): - for size in _get_resize_sizes(bounding_box_loader.spatial_size): - yield ArgsKwargs(bounding_box_loader, spatial_size=bounding_box_loader.spatial_size, size=size) - - -def sample_inputs_resize_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): - yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1]) - - -def sample_inputs_resize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): - yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1]) - - -def reference_resize_bounding_box(bounding_box, *, spatial_size, size, max_size=None): - old_height, old_width = spatial_size - new_height, new_width = F._geometry._compute_resized_output_size(spatial_size, size=size, max_size=max_size) - - if (old_height, old_width) == (new_height, new_width): - return bounding_box, (old_height, old_width) - - affine_matrix = np.array( - [ - [new_width / old_width, 0, 0], - [0, new_height / old_height, 0], - ], - dtype="float64" if bounding_box.dtype == torch.float64 else "float32", - ) - - expected_bboxes = reference_affine_bounding_box_helper( - bounding_box, - format=bounding_box.format, - spatial_size=(new_height, new_width), - affine_matrix=affine_matrix, - ) - return expected_bboxes, (new_height, new_width) - - -def reference_inputs_resize_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(extra_dims=((), (4,))): - for size in _get_resize_sizes(bounding_box_loader.spatial_size): - yield ArgsKwargs(bounding_box_loader, size=size, spatial_size=bounding_box_loader.spatial_size) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.resize_image_tensor, - sample_inputs_fn=sample_inputs_resize_image_tensor, - reference_fn=reference_resize_image_tensor, - reference_inputs_fn=reference_inputs_resize_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(10, mae=True), - **cuda_vs_cpu_pixel_difference(), - **float32_vs_uint8_pixel_difference(1, mae=True), - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_image_tensor, - sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic, - reference_fn=reference_resize_image_tensor, - reference_inputs_fn=reference_inputs_resize_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(10, mae=True), - **cuda_vs_cpu_pixel_difference(atol=30), - **float32_vs_uint8_pixel_difference(1, mae=True), - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_bounding_box, - sample_inputs_fn=sample_inputs_resize_bounding_box, - reference_fn=reference_resize_bounding_box, - reference_inputs_fn=reference_inputs_resize_bounding_box, - closeness_kwargs={ - (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0), - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_mask, - sample_inputs_fn=sample_inputs_resize_mask, - closeness_kwargs=pil_reference_pixel_difference(10), - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_video, - sample_inputs_fn=sample_inputs_resize_video, - closeness_kwargs=cuda_vs_cpu_pixel_difference(), - ), - ] -) - - _AFFINE_KWARGS = combinations_grid( angle=[-87, 15, 90], translate=[(5, 5), (-5, -5)],