diff --git a/test/common_utils.py b/test/common_utils.py index 9713901bdcf..c815786b586 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -1,7 +1,4 @@ -import collections.abc import contextlib -import dataclasses -import enum import functools import itertools import os @@ -12,12 +9,9 @@ import sys import tempfile import warnings -from collections import defaultdict from subprocess import CalledProcessError, check_output, STDOUT -from typing import Callable, Sequence, Tuple, Union import numpy as np - import PIL.Image import pytest import torch @@ -27,7 +21,7 @@ from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair from torchvision import datapoints, io from torchvision.transforms._functional_tensor import _max_value as get_max_value -from torchvision.transforms.v2.functional import to_dtype_image, to_image, to_pil_image +from torchvision.transforms.v2.functional import to_image, to_pil_image IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"]) @@ -363,132 +357,7 @@ def assert_close( assert_equal = functools.partial(assert_close, rtol=0, atol=0) -def parametrized_error_message(*args, **kwargs): - def to_str(obj): - if isinstance(obj, torch.Tensor) and obj.numel() > 30: - return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})" - elif isinstance(obj, enum.Enum): - return f"{type(obj).__name__}.{obj.name}" - else: - return repr(obj) - - if args or kwargs: - postfix = "\n".join( - [ - "", - "Failure happened for the following parameters:", - "", - *[to_str(arg) for arg in args], - *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()], - ] - ) - else: - postfix = "" - - def wrapper(msg): - return msg + postfix - - return wrapper - - -class ArgsKwargs: - def __init__(self, *args, **kwargs): - self.args = args - self.kwargs = kwargs - - def __iter__(self): - yield self.args - yield self.kwargs - - def load(self, device="cpu"): - return ArgsKwargs( - *(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args), - **{ - keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg - for keyword, arg in self.kwargs.items() - }, - ) - - -# new v2 default DEFAULT_SIZE = (17, 11) -# old v2 defaults -DEFAULT_SQUARE_SPATIAL_SIZE = 15 -DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) -DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) -DEFAULT_SPATIAL_SIZES = ( - DEFAULT_LANDSCAPE_SPATIAL_SIZE, - DEFAULT_PORTRAIT_SPATIAL_SIZE, - DEFAULT_SQUARE_SPATIAL_SIZE, -) - - -def _parse_size(size, *, name="size"): - if size == "random": - raise ValueError("This should never happen") - elif isinstance(size, int) and size > 0: - return (size, size) - elif ( - isinstance(size, collections.abc.Sequence) - and len(size) == 2 - and all(isinstance(length, int) and length > 0 for length in size) - ): - return tuple(size) - else: - raise pytest.UsageError( - f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers," - f"but got {size} instead." - ) - - -VALID_EXTRA_DIMS = ((), (4,), (2, 3)) -DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5)) - -DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS) - - -def from_loader(loader_fn): - def wrapper(*args, **kwargs): - device = kwargs.pop("device", "cpu") - loader = loader_fn(*args, **kwargs) - return loader.load(device) - - return wrapper - - -def from_loaders(loaders_fn): - def wrapper(*args, **kwargs): - device = kwargs.pop("device", "cpu") - loaders = loaders_fn(*args, **kwargs) - for loader in loaders: - yield loader.load(device) - - return wrapper - - -@dataclasses.dataclass -class TensorLoader: - fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor] - shape: Sequence[int] - dtype: torch.dtype - - def load(self, device): - return self.fn(self.shape, self.dtype, device) - - -@dataclasses.dataclass -class ImageLoader(TensorLoader): - spatial_size: Tuple[int, int] = dataclasses.field(init=False) - num_channels: int = dataclasses.field(init=False) - memory_format: torch.memory_format = torch.contiguous_format - canvas_size: Tuple[int, int] = dataclasses.field(init=False) - - def __post_init__(self): - self.spatial_size = self.canvas_size = self.shape[-2:] - self.num_channels = self.shape[-3] - - def load(self, device): - return self.fn(self.shape, self.dtype, device, memory_format=self.memory_format) NUM_CHANNELS_MAP = { @@ -499,13 +368,6 @@ def load(self, device): } -def get_num_channels(color_space): - num_channels = NUM_CHANNELS_MAP.get(color_space) - if not num_channels: - raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}") - return num_channels - - def make_image( size=DEFAULT_SIZE, *, @@ -515,10 +377,11 @@ def make_image( device="cpu", memory_format=torch.contiguous_format, ): + num_channels = NUM_CHANNELS_MAP[color_space] dtype = dtype or torch.uint8 max_value = get_max_value(dtype) data = torch.testing.make_tensor( - (*batch_dims, get_num_channels(color_space), *size), + (*batch_dims, num_channels, *size), low=0, high=max_value, dtype=dtype, @@ -539,109 +402,7 @@ def make_image_pil(*args, **kwargs): return to_pil_image(make_image(*args, **kwargs)) -def make_image_loader( - size=DEFAULT_PORTRAIT_SPATIAL_SIZE, - *, - color_space="RGB", - extra_dims=(), - dtype=torch.float32, - constant_alpha=True, - memory_format=torch.contiguous_format, -): - if not constant_alpha: - raise ValueError("This should never happen") - size = _parse_size(size) - num_channels = get_num_channels(color_space) - - def fn(shape, dtype, device, memory_format): - *batch_dims, _, height, width = shape - return make_image( - (height, width), - color_space=color_space, - batch_dims=batch_dims, - dtype=dtype, - device=device, - memory_format=memory_format, - ) - - return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format) - - -def make_image_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - color_spaces=( - "GRAY", - "GRAY_ALPHA", - "RGB", - "RGBA", - ), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.float32, torch.float64, torch.uint8), - constant_alpha=True, -): - for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes): - yield make_image_loader(**params, constant_alpha=constant_alpha) - - -make_images = from_loaders(make_image_loaders) - - -def make_image_loader_for_interpolation( - size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format -): - size = _parse_size(size) - num_channels = get_num_channels(color_space) - - def fn(shape, dtype, device, memory_format): - height, width = shape[-2:] - - image_pil = ( - PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") - .resize((width, height)) - .convert( - { - "GRAY": "L", - "GRAY_ALPHA": "LA", - "RGB": "RGB", - "RGBA": "RGBA", - }[color_space] - ) - ) - - image_tensor = to_image(image_pil) - if memory_format == torch.contiguous_format: - image_tensor = image_tensor.to(device=device, memory_format=memory_format, copy=True) - else: - image_tensor = image_tensor.to(device=device) - image_tensor = to_dtype_image(image_tensor, dtype=dtype, scale=True) - - return datapoints.Image(image_tensor) - - return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, memory_format=memory_format) - - -def make_image_loaders_for_interpolation( - sizes=((233, 147),), - color_spaces=("RGB",), - dtypes=(torch.uint8,), - memory_formats=(torch.contiguous_format, torch.channels_last), -): - for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes, memory_format=memory_formats): - yield make_image_loader_for_interpolation(**params) - - -@dataclasses.dataclass -class BoundingBoxesLoader(TensorLoader): - format: datapoints.BoundingBoxFormat - spatial_size: Tuple[int, int] - canvas_size: Tuple[int, int] = dataclasses.field(init=False) - - def __post_init__(self): - self.canvas_size = self.spatial_size - - -def make_bounding_box( +def make_bounding_boxes( canvas_size=DEFAULT_SIZE, *, format=datapoints.BoundingBoxFormat.XYXY, @@ -687,42 +448,6 @@ def sample_position(values, max_value): ) -def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32): - if isinstance(format, str): - format = datapoints.BoundingBoxFormat[format] - - spatial_size = _parse_size(spatial_size, name="spatial_size") - - def fn(shape, dtype, device): - *batch_dims, num_coordinates = shape - if num_coordinates != 4: - raise pytest.UsageError() - - return make_bounding_box( - format=format, canvas_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device - ) - - return BoundingBoxesLoader(fn, shape=(*extra_dims[-1:], 4), dtype=dtype, format=format, spatial_size=spatial_size) - - -def make_bounding_box_loaders( - *, - extra_dims=tuple(d for d in DEFAULT_EXTRA_DIMS if len(d) < 2), - formats=tuple(datapoints.BoundingBoxFormat), - spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, - dtypes=(torch.float32, torch.float64, torch.int64), -): - for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): - yield make_bounding_box_loader(**params, spatial_size=spatial_size) - - -make_bounding_boxes = from_loaders(make_bounding_box_loaders) - - -class MaskLoader(TensorLoader): - pass - - def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" return datapoints.Mask( @@ -736,32 +461,6 @@ def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtyp ) -def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8): - # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects - size = _parse_size(size) - - def fn(shape, dtype, device): - *batch_dims, num_objects, height, width = shape - return make_detection_mask( - (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device - ) - - return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) - - -def make_detection_mask_loaders( - sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, 5), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8,), -): - for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes): - yield make_detection_mask_loader(**params) - - -make_detection_masks = from_loaders(make_detection_mask_loaders) - - def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" return datapoints.Mask( @@ -775,56 +474,6 @@ def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=( ) -def make_segmentation_mask_loader( - size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8 -): - # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values - size = _parse_size(size) - - def fn(shape, dtype, device): - *batch_dims, height, width = shape - return make_segmentation_mask( - (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device - ) - - return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) - - -def make_segmentation_mask_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - num_categories=(1, 2, 10), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8,), -): - for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes): - yield make_segmentation_mask_loader(**params) - - -make_segmentation_masks = from_loaders(make_segmentation_mask_loaders) - - -def make_mask_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, 5), - num_categories=(1, 2, 10), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8,), -): - yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes) - yield from make_segmentation_mask_loaders( - sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes - ) - - -make_masks = from_loaders(make_mask_loaders) - - -class VideoLoader(ImageLoader): - pass - - def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs): return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs)) @@ -833,120 +482,6 @@ def make_video_tensor(*args, **kwargs): return make_video(*args, **kwargs).as_subclass(torch.Tensor) -def make_video_loader( - size=DEFAULT_PORTRAIT_SPATIAL_SIZE, - *, - color_space="RGB", - num_frames=3, - extra_dims=(), - dtype=torch.uint8, -): - size = _parse_size(size) - - def fn(shape, dtype, device, memory_format): - *batch_dims, num_frames, _, height, width = shape - return make_video( - (height, width), - num_frames=num_frames, - batch_dims=batch_dims, - color_space=color_space, - dtype=dtype, - device=device, - memory_format=memory_format, - ) - - return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) - - -def make_video_loaders( - *, - sizes=DEFAULT_SPATIAL_SIZES, - color_spaces=( - "GRAY", - "RGB", - ), - num_frames=(1, 0, 3), - extra_dims=DEFAULT_EXTRA_DIMS, - dtypes=(torch.uint8, torch.float32, torch.float64), -): - for params in combinations_grid( - size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes - ): - yield make_video_loader(**params) - - -make_videos = from_loaders(make_video_loaders) - - -class TestMark: - def __init__( - self, - # Tuple of test class name and test function name that identifies the test the mark is applied to. If there is - # no test class, i.e. a standalone test function, use `None`. - test_id, - # `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail` - mark, - *, - # Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be - # applied. If omitted, defaults to always apply. - condition=None, - ): - self.test_id = test_id - self.mark = mark - self.condition = condition or (lambda args_kwargs: True) - - -def mark_framework_limitation(test_id, reason, condition=None): - # The purpose of this function is to have a single entry point for skip marks that are only there, because the test - # framework cannot handle the kernel in general or a specific parameter combination. - # As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is - # still justified. - # We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus, - # we are wasting CI resources for no reason for most of the time - return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition) - - -class InfoBase: - def __init__( - self, - *, - # Identifier if the info that shows up the parametrization. - id, - # Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization. - # See the `TestMark` class for details - test_marks=None, - # Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see - # `TestMark`), the dtype, and the device. - closeness_kwargs=None, - ): - self.id = id - - self.test_marks = test_marks or [] - test_marks_map = defaultdict(list) - for test_mark in self.test_marks: - test_marks_map[test_mark.test_id].append(test_mark) - self._test_marks_map = dict(test_marks_map) - - self.closeness_kwargs = closeness_kwargs or dict() - - def get_marks(self, test_id, args_kwargs): - return [ - test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs) - ] - - def get_closeness_kwargs(self, test_id, *, dtype, device): - if not (isinstance(test_id, tuple) and len(test_id) == 2): - msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name" - if callable(test_id): - msg += ". Did you forget to add the `test_id` fixture to parameters of the test?" - else: - msg += f", but got {test_id} instead." - raise pytest.UsageError(msg) - if isinstance(device, torch.device): - device = device.type - return self.closeness_kwargs.get((test_id, dtype, device), dict()) - - def assert_run_python_script(source_code): """Utility to check assertions in an independent Python subprocess. diff --git a/test/prototype_common_utils.py b/test/prototype_common_utils.py index 8259246c0cb..acbe1a6a77a 100644 --- a/test/prototype_common_utils.py +++ b/test/prototype_common_utils.py @@ -4,12 +4,12 @@ import pytest import torch - -from common_utils import combinations_grid, DEFAULT_EXTRA_DIMS, from_loader, from_loaders, TensorLoader from torch.nn.functional import one_hot from torchvision.prototype import datapoints +from transforms_v2_legacy_utils import combinations_grid, DEFAULT_EXTRA_DIMS, from_loader, from_loaders, TensorLoader + @dataclasses.dataclass class LabelLoader(TensorLoader): diff --git a/test/test_datapoints.py b/test/test_datapoints.py index 4da2eb39383..1aeb2367752 100644 --- a/test/test_datapoints.py +++ b/test/test_datapoints.py @@ -2,7 +2,7 @@ import pytest import torch -from common_utils import assert_equal, make_bounding_box, make_image, make_segmentation_mask, make_video +from common_utils import assert_equal, make_bounding_boxes, make_image, make_segmentation_mask, make_video from PIL import Image from torchvision import datapoints @@ -68,7 +68,7 @@ def test_new_requires_grad(data, input_requires_grad, expected_requires_grad): assert datapoint.requires_grad is expected_requires_grad -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) def test_isinstance(make_input): assert isinstance(make_input(), torch.Tensor) @@ -80,7 +80,7 @@ def test_wrapping_no_copy(): assert image.data_ptr() == tensor.data_ptr() -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) def test_to_wrapping(make_input): dp = make_input() @@ -90,7 +90,7 @@ def test_to_wrapping(make_input): assert dp_to.dtype is torch.float64 -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) def test_to_datapoint_reference(make_input, return_type): tensor = torch.rand((3, 16, 16), dtype=torch.float64) @@ -104,7 +104,7 @@ def test_to_datapoint_reference(make_input, return_type): assert type(tensor) is torch.Tensor -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) def test_clone_wrapping(make_input, return_type): dp = make_input() @@ -116,7 +116,7 @@ def test_clone_wrapping(make_input, return_type): assert dp_clone.data_ptr() != dp.data_ptr() -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) def test_requires_grad__wrapping(make_input, return_type): dp = make_input(dtype=torch.float) @@ -131,7 +131,7 @@ def test_requires_grad__wrapping(make_input, return_type): assert dp_requires_grad.requires_grad -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) def test_detach_wrapping(make_input, return_type): dp = make_input(dtype=torch.float).requires_grad_(True) @@ -170,7 +170,7 @@ def test_force_subclass_with_metadata(return_type): datapoints.set_return_type("tensor") -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) def test_other_op_no_wrapping(make_input, return_type): dp = make_input() @@ -182,7 +182,7 @@ def test_other_op_no_wrapping(make_input, return_type): assert type(output) is (type(dp) if return_type == "datapoint" else torch.Tensor) -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize( "op", [ @@ -199,7 +199,7 @@ def test_no_tensor_output_op_no_wrapping(make_input, op): assert type(output) is not type(dp) -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) def test_inplace_op_no_wrapping(make_input, return_type): dp = make_input() @@ -212,7 +212,7 @@ def test_inplace_op_no_wrapping(make_input, return_type): assert type(dp) is original_type -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) def test_wrap(make_input): dp = make_input() @@ -225,7 +225,7 @@ def test_wrap(make_input): assert dp_new.data_ptr() == output.data_ptr() -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("requires_grad", [False, True]) def test_deepcopy(make_input, requires_grad): dp = make_input(dtype=torch.float) @@ -242,7 +242,7 @@ def test_deepcopy(make_input, requires_grad): assert dp_deepcopied.requires_grad is requires_grad -@pytest.mark.parametrize("make_input", [make_image, make_bounding_box, make_segmentation_mask, make_video]) +@pytest.mark.parametrize("make_input", [make_image, make_bounding_boxes, make_segmentation_mask, make_video]) @pytest.mark.parametrize("return_type", ["Tensor", "datapoint"]) @pytest.mark.parametrize( "op", diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index bf45970df97..0410ecadc48 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -4,14 +4,7 @@ import pytest import torch -from common_utils import ( - assert_equal, - DEFAULT_EXTRA_DIMS, - make_bounding_box, - make_detection_mask, - make_image, - make_video, -) +from common_utils import assert_equal from prototype_common_utils import make_label @@ -19,6 +12,13 @@ from torchvision.prototype import datapoints, transforms from torchvision.transforms.v2.functional import clamp_bounding_boxes, InterpolationMode, pil_to_tensor, to_pil_image from torchvision.transforms.v2.utils import check_type, is_pure_tensor +from transforms_v2_legacy_utils import ( + DEFAULT_EXTRA_DIMS, + make_bounding_boxes, + make_detection_mask, + make_image, + make_video, +) BATCH_EXTRA_DIMS = [extra_dims for extra_dims in DEFAULT_EXTRA_DIMS if extra_dims] @@ -167,7 +167,7 @@ def test__get_params(self, mocker): flat_inputs = [ make_image(size=canvas_size, color_space="RGB"), - make_bounding_box(format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, batch_dims=batch_shape), + make_bounding_boxes(format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, batch_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -202,7 +202,7 @@ def test__transform_culling(self, mocker): ), ) - bounding_boxes = make_bounding_box( + bounding_boxes = make_bounding_boxes( format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, batch_dims=(batch_size,) ) masks = make_detection_mask(size=canvas_size, batch_dims=(batch_size,)) @@ -240,7 +240,7 @@ def test__transform_bounding_boxes_clamping(self, mocker): ), ) - bounding_boxes = make_bounding_box( + bounding_boxes = make_bounding_boxes( format=BoundingBoxFormat.XYXY, canvas_size=canvas_size, batch_dims=(batch_size,) ) mock = mocker.patch( @@ -283,7 +283,7 @@ class TestPermuteDimensions: def test_call(self, dims, inverse_dims): sample = dict( image=make_image(), - bounding_boxes=make_bounding_box(format=BoundingBoxFormat.XYXY), + bounding_boxes=make_bounding_boxes(format=BoundingBoxFormat.XYXY), video=make_video(), str="str", int=0, @@ -327,7 +327,7 @@ class TestTransposeDimensions: def test_call(self, dims): sample = dict( image=make_image(), - bounding_boxes=make_bounding_box(format=BoundingBoxFormat.XYXY), + bounding_boxes=make_bounding_boxes(format=BoundingBoxFormat.XYXY), video=make_video(), str="str", int=0, @@ -389,7 +389,7 @@ def make_datapoints(): pil_image = to_pil_image(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } @@ -398,7 +398,7 @@ def make_datapoints(): tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } @@ -407,7 +407,7 @@ def make_datapoints(): datapoint_image = make_image(size=size, color_space="RGB") target = { - "boxes": make_bounding_box(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index d7a6f21bbe7..5752b323f79 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -11,25 +11,23 @@ import torch import torchvision.transforms.v2 as transforms -from common_utils import ( - assert_equal, - assert_run_python_script, - cpu_and_cuda, - make_bounding_box, +from common_utils import assert_equal, assert_run_python_script, cpu_and_cuda +from torch.utils._pytree import tree_flatten, tree_unflatten +from torchvision import datapoints +from torchvision.ops.boxes import box_iou +from torchvision.transforms.functional import to_pil_image +from torchvision.transforms.v2 import functional as F +from torchvision.transforms.v2.utils import check_type, is_pure_tensor, query_chw +from transforms_v2_legacy_utils import ( make_bounding_boxes, make_detection_mask, make_image, make_images, + make_multiple_bounding_boxes, make_segmentation_mask, make_video, make_videos, ) -from torch.utils._pytree import tree_flatten, tree_unflatten -from torchvision import datapoints -from torchvision.ops.boxes import box_iou -from torchvision.transforms.functional import to_pil_image -from torchvision.transforms.v2 import functional as F -from torchvision.transforms.v2.utils import check_type, is_pure_tensor, query_chw def make_vanilla_tensor_images(*args, **kwargs): @@ -45,7 +43,7 @@ def make_pil_images(*args, **kwargs): def make_vanilla_tensor_bounding_boxes(*args, **kwargs): - for bounding_boxes in make_bounding_boxes(*args, **kwargs): + for bounding_boxes in make_multiple_bounding_boxes(*args, **kwargs): yield bounding_boxes.data @@ -180,13 +178,13 @@ def test_common(self, transform, adapter, container_type, image_or_video, device image_datapoint=make_image(size=canvas_size), video_datapoint=make_video(size=canvas_size), image_pil=next(make_pil_images(sizes=[canvas_size], color_spaces=["RGB"])), - bounding_boxes_xyxy=make_bounding_box( + bounding_boxes_xyxy=make_bounding_boxes( format=datapoints.BoundingBoxFormat.XYXY, canvas_size=canvas_size, batch_dims=(3,) ), - bounding_boxes_xywh=make_bounding_box( + bounding_boxes_xywh=make_bounding_boxes( format=datapoints.BoundingBoxFormat.XYWH, canvas_size=canvas_size, batch_dims=(4,) ), - bounding_boxes_cxcywh=make_bounding_box( + bounding_boxes_cxcywh=make_bounding_boxes( format=datapoints.BoundingBoxFormat.CXCYWH, canvas_size=canvas_size, batch_dims=(5,) ), bounding_boxes_degenerate_xyxy=datapoints.BoundingBoxes( @@ -813,7 +811,7 @@ def test__transform(self, mocker): size = (32, 24) image = make_image(size) - bboxes = make_bounding_box(format="XYXY", canvas_size=size, batch_dims=(6,)) + bboxes = make_bounding_boxes(format="XYXY", canvas_size=size, batch_dims=(6,)) masks = make_detection_mask(size, num_objects=6) sample = [image, bboxes, masks] diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index 3196a5fd82c..61de769d885 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -12,17 +12,7 @@ import torch import torchvision.transforms.v2 as v2_transforms -from common_utils import ( - ArgsKwargs, - assert_close, - assert_equal, - make_bounding_box, - make_detection_mask, - make_image, - make_images, - make_segmentation_mask, - set_rng_seed, -) +from common_utils import assert_close, assert_equal, set_rng_seed from torch import nn from torchvision import datapoints, transforms as legacy_transforms from torchvision._utils import sequence_to_str @@ -32,6 +22,14 @@ from torchvision.transforms.v2._utils import _get_fill from torchvision.transforms.v2.functional import to_pil_image from torchvision.transforms.v2.utils import query_size +from transforms_v2_legacy_utils import ( + ArgsKwargs, + make_bounding_boxes, + make_detection_mask, + make_image, + make_images, + make_segmentation_mask, +) DEFAULT_MAKE_IMAGES_KWARGS = dict(color_spaces=["RGB"], extra_dims=[(4,)]) @@ -1090,7 +1088,7 @@ def make_label(extra_dims, categories): pil_image = to_pil_image(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1100,7 +1098,7 @@ def make_label(extra_dims, categories): tensor_image = torch.Tensor(make_image(size=size, color_space="RGB", dtype=torch.float32)) target = { - "boxes": make_bounding_box(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1110,7 +1108,7 @@ def make_label(extra_dims, categories): datapoint_image = make_image(size=size, color_space="RGB", dtype=torch.float32) target = { - "boxes": make_bounding_box(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_boxes(canvas_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 29ef54d925a..15af5a7a9ed 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -8,16 +8,7 @@ import pytest import torch -from common_utils import ( - assert_close, - cache, - cpu_and_cuda, - DEFAULT_SQUARE_SPATIAL_SIZE, - make_bounding_boxes, - needs_cuda, - parametrized_error_message, - set_rng_seed, -) +from common_utils import assert_close, cache, cpu_and_cuda, needs_cuda, set_rng_seed from torch.utils._pytree import tree_map from torchvision import datapoints from torchvision.transforms.functional import _get_perspective_coeffs @@ -27,6 +18,11 @@ from torchvision.transforms.v2.utils import is_pure_tensor from transforms_v2_dispatcher_infos import DISPATCHER_INFOS from transforms_v2_kernel_infos import KERNEL_INFOS +from transforms_v2_legacy_utils import ( + DEFAULT_SQUARE_SPATIAL_SIZE, + make_multiple_bounding_boxes, + parametrized_error_message, +) KERNEL_INFOS_MAP = {info.kernel: info for info in KERNEL_INFOS} @@ -506,7 +502,7 @@ class TestClampBoundingBoxes: ], ) def test_pure_tensor_insufficient_metadata(self, metadata): - pure_tensor = next(make_bounding_boxes()).as_subclass(torch.Tensor) + pure_tensor = next(make_multiple_bounding_boxes()).as_subclass(torch.Tensor) with pytest.raises(ValueError, match=re.escape("`format` and `canvas_size` has to be passed")): F.clamp_bounding_boxes(pure_tensor, **metadata) @@ -520,7 +516,7 @@ def test_pure_tensor_insufficient_metadata(self, metadata): ], ) def test_datapoint_explicit_metadata(self, metadata): - datapoint = next(make_bounding_boxes()) + datapoint = next(make_multiple_bounding_boxes()) with pytest.raises(ValueError, match=re.escape("`format` and `canvas_size` must not be passed")): F.clamp_bounding_boxes(datapoint, **metadata) @@ -530,8 +526,8 @@ class TestConvertFormatBoundingBoxes: @pytest.mark.parametrize( ("inpt", "old_format"), [ - (next(make_bounding_boxes()), None), - (next(make_bounding_boxes()).as_subclass(torch.Tensor), datapoints.BoundingBoxFormat.XYXY), + (next(make_multiple_bounding_boxes()), None), + (next(make_multiple_bounding_boxes()).as_subclass(torch.Tensor), datapoints.BoundingBoxFormat.XYXY), ], ) def test_missing_new_format(self, inpt, old_format): @@ -539,13 +535,13 @@ def test_missing_new_format(self, inpt, old_format): F.convert_format_bounding_boxes(inpt, old_format) def test_pure_tensor_insufficient_metadata(self): - pure_tensor = next(make_bounding_boxes()).as_subclass(torch.Tensor) + pure_tensor = next(make_multiple_bounding_boxes()).as_subclass(torch.Tensor) with pytest.raises(ValueError, match=re.escape("`old_format` has to be passed")): F.convert_format_bounding_boxes(pure_tensor, new_format=datapoints.BoundingBoxFormat.CXCYWH) def test_datapoint_explicit_metadata(self): - datapoint = next(make_bounding_boxes()) + datapoint = next(make_multiple_bounding_boxes()) with pytest.raises(ValueError, match=re.escape("`old_format` must not be passed")): F.convert_format_bounding_boxes( @@ -736,7 +732,7 @@ def _compute_expected_canvas_size(bbox, padding_): height, width = bbox.canvas_size return height + pad_up + pad_down, width + pad_left + pad_right - for bboxes in make_bounding_boxes(extra_dims=((4,),)): + for bboxes in make_multiple_bounding_boxes(extra_dims=((4,),)): bboxes = bboxes.to(device) bboxes_format = bboxes.format bboxes_canvas_size = bboxes.canvas_size @@ -822,7 +818,7 @@ def _compute_expected_bbox(bbox, format_, canvas_size_, pcoeffs_): pcoeffs = _get_perspective_coeffs(startpoints, endpoints) inv_pcoeffs = _get_perspective_coeffs(endpoints, startpoints) - for bboxes in make_bounding_boxes(spatial_size=canvas_size, extra_dims=((4,),)): + for bboxes in make_multiple_bounding_boxes(spatial_size=canvas_size, extra_dims=((4,),)): bboxes = bboxes.to(device) output_bboxes = F.perspective_bounding_boxes( @@ -870,7 +866,7 @@ def _compute_expected_bbox(bbox, format_, canvas_size_, output_size_): out_bbox = clamp_bounding_boxes(out_bbox, format=format_, canvas_size=output_size) return out_bbox.to(dtype=dtype, device=bbox.device) - for bboxes in make_bounding_boxes(extra_dims=((4,),)): + for bboxes in make_multiple_bounding_boxes(extra_dims=((4,),)): bboxes = bboxes.to(device) bboxes_format = bboxes.format bboxes_canvas_size = bboxes.canvas_size diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py index c51b7c7555f..f57736e5abd 100644 --- a/test/test_transforms_v2_refactored.py +++ b/test/test_transforms_v2_refactored.py @@ -19,7 +19,7 @@ cpu_and_cuda, freeze_rng_state, ignore_jit_no_profile_information_warning, - make_bounding_box, + make_bounding_boxes, make_detection_mask, make_image, make_image_pil, @@ -456,7 +456,7 @@ def test_kernel_bounding_boxes(self, format, size, use_max_size, dtype, device): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_boxes = make_bounding_box( + bounding_boxes = make_bounding_boxes( format=format, canvas_size=self.INPUT_SIZE, dtype=dtype, @@ -481,7 +481,7 @@ def test_kernel_video(self): @pytest.mark.parametrize("size", OUTPUT_SIZES) @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, size, make_input): check_functional( @@ -514,7 +514,7 @@ def test_functional_signature(self, kernel, input_type): make_image_tensor, make_image_pil, make_image, - make_bounding_box, + make_bounding_boxes, make_segmentation_mask, make_detection_mask, make_video, @@ -579,7 +579,7 @@ def test_bounding_boxes_correctness(self, format, size, use_max_size, fn): if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): return - bounding_boxes = make_bounding_box(format=format, canvas_size=self.INPUT_SIZE) + bounding_boxes = make_bounding_boxes(format=format, canvas_size=self.INPUT_SIZE) actual = fn(bounding_boxes, size=size, **max_size_kwarg) expected = self._reference_resize_bounding_boxes(bounding_boxes, size=size, **max_size_kwarg) @@ -618,7 +618,7 @@ def test_functional_pil_antialias_warning(self): make_image_tensor, make_image_pil, make_image, - make_bounding_box, + make_bounding_boxes, make_segmentation_mask, make_detection_mask, make_video, @@ -687,7 +687,7 @@ def test_transform_unknown_size_error(self): make_image_tensor, make_image_pil, make_image, - make_bounding_box, + make_bounding_boxes, make_segmentation_mask, make_detection_mask, make_video, @@ -714,7 +714,7 @@ def test_noop(self, size, make_input): make_image_tensor, make_image_pil, make_image, - make_bounding_box, + make_bounding_boxes, make_segmentation_mask, make_detection_mask, make_video, @@ -743,7 +743,7 @@ def test_kernel_image_tensor(self, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, format, dtype, device): - bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device) + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( F.horizontal_flip_bounding_boxes, bounding_boxes, @@ -760,7 +760,7 @@ def test_kernel_video(self): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, make_input): check_functional(F.horizontal_flip, make_input()) @@ -781,7 +781,7 @@ def test_functional_signature(self, kernel, input_type): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform(self, make_input, device): @@ -821,7 +821,7 @@ def _reference_horizontal_flip_bounding_boxes(self, bounding_boxes): "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] ) def test_bounding_boxes_correctness(self, format, fn): - bounding_boxes = make_bounding_box(format=format) + bounding_boxes = make_bounding_boxes(format=format) actual = fn(bounding_boxes) expected = self._reference_horizontal_flip_bounding_boxes(bounding_boxes) @@ -830,7 +830,7 @@ def test_bounding_boxes_correctness(self, format, fn): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform_noop(self, make_input, device): @@ -917,7 +917,7 @@ def test_kernel_image_tensor(self, param, value, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, param, value, format, dtype, device): - bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device) + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) self._check_kernel( F.affine_bounding_boxes, bounding_boxes, @@ -936,7 +936,7 @@ def test_kernel_video(self): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, make_input): check_functional(F.affine, make_input(), **self._MINIMAL_AFFINE_KWARGS) @@ -957,7 +957,7 @@ def test_functional_signature(self, kernel, input_type): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform(self, make_input, device): @@ -1076,7 +1076,7 @@ def _reference_affine_bounding_boxes(self, bounding_boxes, *, angle, translate, @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) def test_functional_bounding_boxes_correctness(self, format, angle, translate, scale, shear, center): - bounding_boxes = make_bounding_box(format=format) + bounding_boxes = make_bounding_boxes(format=format) actual = F.affine( bounding_boxes, @@ -1101,7 +1101,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, translate, s @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_boxes_correctness(self, format, center, seed): - bounding_boxes = make_bounding_box(format=format) + bounding_boxes = make_bounding_boxes(format=format) transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center) @@ -1208,7 +1208,7 @@ def test_kernel_image_tensor(self, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, format, dtype, device): - bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device) + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( F.vertical_flip_bounding_boxes, bounding_boxes, @@ -1225,7 +1225,7 @@ def test_kernel_video(self): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, make_input): check_functional(F.vertical_flip, make_input()) @@ -1246,7 +1246,7 @@ def test_functional_signature(self, kernel, input_type): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform(self, make_input, device): @@ -1282,7 +1282,7 @@ def _reference_vertical_flip_bounding_boxes(self, bounding_boxes): @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) def test_bounding_boxes_correctness(self, format, fn): - bounding_boxes = make_bounding_box(format=format) + bounding_boxes = make_bounding_boxes(format=format) actual = fn(bounding_boxes) expected = self._reference_vertical_flip_bounding_boxes(bounding_boxes) @@ -1291,7 +1291,7 @@ def test_bounding_boxes_correctness(self, format, fn): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform_noop(self, make_input, device): @@ -1356,7 +1356,7 @@ def test_kernel_bounding_boxes(self, param, value, format, dtype, device): if param != "angle": kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] - bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device) + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( F.rotate_bounding_boxes, @@ -1375,7 +1375,7 @@ def test_kernel_video(self): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, make_input): check_functional(F.rotate, make_input(), **self._MINIMAL_AFFINE_KWARGS) @@ -1396,7 +1396,7 @@ def test_functional_signature(self, kernel, input_type): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_transform(self, make_input, device): @@ -1490,7 +1490,7 @@ def _reference_rotate_bounding_boxes(self, bounding_boxes, *, angle, expand, cen @pytest.mark.parametrize("expand", [False]) @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) def test_functional_bounding_boxes_correctness(self, format, angle, expand, center): - bounding_boxes = make_bounding_box(format=format) + bounding_boxes = make_bounding_boxes(format=format) actual = F.rotate(bounding_boxes, angle=angle, expand=expand, center=center) expected = self._reference_rotate_bounding_boxes(bounding_boxes, angle=angle, expand=expand, center=center) @@ -1503,7 +1503,7 @@ def test_functional_bounding_boxes_correctness(self, format, angle, expand, cent @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) @pytest.mark.parametrize("seed", list(range(5))) def test_transform_bounding_boxes_correctness(self, format, expand, center, seed): - bounding_boxes = make_bounding_box(format=format) + bounding_boxes = make_bounding_boxes(format=format) transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center) @@ -1652,7 +1652,7 @@ def test_functional(self, make_input, input_dtype, output_dtype, device, scale): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) @pytest.mark.parametrize("input_dtype", [torch.float32, torch.float64, torch.uint8]) @pytest.mark.parametrize("output_dtype", [torch.float32, torch.float64, torch.uint8]) @@ -1727,7 +1727,7 @@ def make_inpt_with_bbox_and_mask(self, make_input): mask_dtype = torch.bool sample = { "inpt": make_input(size=(H, W), dtype=inpt_dtype), - "bbox": make_bounding_box(canvas_size=(H, W), dtype=bbox_dtype), + "bbox": make_bounding_boxes(canvas_size=(H, W), dtype=bbox_dtype), "mask": make_detection_mask(size=(H, W), dtype=mask_dtype), } @@ -2013,7 +2013,7 @@ def test_get_num_channels(self, kernel, make_input): (F.get_size_image, make_image_tensor), (F._get_size_image_pil, make_image_pil), (F.get_size_image, make_image), - (F.get_size_bounding_boxes, make_bounding_box), + (F.get_size_bounding_boxes, make_bounding_boxes), (F.get_size_mask, make_detection_mask), (F.get_size_mask, make_segmentation_mask), (F.get_size_video, make_video), @@ -2043,15 +2043,15 @@ def test_get_num_frames(self, kernel, make_input): @pytest.mark.parametrize( ("functional", "make_input"), [ - (F.get_dimensions, make_bounding_box), + (F.get_dimensions, make_bounding_boxes), (F.get_dimensions, make_detection_mask), (F.get_dimensions, make_segmentation_mask), - (F.get_num_channels, make_bounding_box), + (F.get_num_channels, make_bounding_boxes), (F.get_num_channels, make_detection_mask), (F.get_num_channels, make_segmentation_mask), (F.get_num_frames, make_image_pil), (F.get_num_frames, make_image), - (F.get_num_frames, make_bounding_box), + (F.get_num_frames, make_bounding_boxes), (F.get_num_frames, make_detection_mask), (F.get_num_frames, make_segmentation_mask), ], @@ -2290,7 +2290,7 @@ def test_kernel_image_tensor(self, param, value, dtype, device): @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) @pytest.mark.parametrize("device", cpu_and_cuda()) def test_kernel_bounding_boxes(self, format, dtype, device): - bounding_boxes = make_bounding_box(format=format, dtype=dtype, device=device) + bounding_boxes = make_bounding_boxes(format=format, dtype=dtype, device=device) check_kernel( F.elastic_bounding_boxes, @@ -2311,7 +2311,7 @@ def test_kernel_video(self): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_functional(self, make_input): input = make_input() @@ -2333,7 +2333,7 @@ def test_functional_signature(self, kernel, input_type): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) def test_displacement_error(self, make_input): input = make_input() @@ -2346,7 +2346,7 @@ def test_displacement_error(self, make_input): @pytest.mark.parametrize( "make_input", - [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + [make_image_tensor, make_image_pil, make_image, make_bounding_boxes, make_segmentation_mask, make_video], ) # ElasticTransform needs larger images to avoid the needed internal padding being larger than the actual image @pytest.mark.parametrize("size", [(163, 163), (72, 333), (313, 95)]) @@ -2363,7 +2363,7 @@ def test_correctness(self): "img_pil": make_image_pil(), "mask": make_detection_mask(), "video": make_video(), - "bbox": make_bounding_box(), + "bbox": make_bounding_boxes(), "str": "str", } diff --git a/test/test_transforms_v2_utils.py b/test/test_transforms_v2_utils.py index 0cfe0db7077..55825d652e6 100644 --- a/test/test_transforms_v2_utils.py +++ b/test/test_transforms_v2_utils.py @@ -4,7 +4,7 @@ import torch import torchvision.transforms.v2.utils -from common_utils import DEFAULT_SIZE, make_bounding_box, make_detection_mask, make_image +from common_utils import DEFAULT_SIZE, make_bounding_boxes, make_detection_mask, make_image from torchvision import datapoints from torchvision.transforms.v2.functional import to_pil_image @@ -12,7 +12,7 @@ IMAGE = make_image(DEFAULT_SIZE, color_space="RGB") -BOUNDING_BOX = make_bounding_box(DEFAULT_SIZE, format=datapoints.BoundingBoxFormat.XYXY) +BOUNDING_BOX = make_bounding_boxes(DEFAULT_SIZE, format=datapoints.BoundingBoxFormat.XYXY) MASK = make_detection_mask(DEFAULT_SIZE) diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 903518627de..375c307324c 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -2,9 +2,9 @@ import pytest import torchvision.transforms.v2.functional as F -from common_utils import InfoBase, TestMark from torchvision import datapoints from transforms_v2_kernel_infos import KERNEL_INFOS, pad_xfail_jit_fill_condition +from transforms_v2_legacy_utils import InfoBase, TestMark __all__ = ["DispatcherInfo", "DISPATCHER_INFOS"] diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index acb9a857750..33813b6519d 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -7,7 +7,9 @@ import torch.testing import torchvision.ops import torchvision.transforms.v2.functional as F -from common_utils import ( +from torchvision import datapoints +from torchvision.transforms._functional_tensor import _max_value as get_max_value, _parse_pad_padding +from transforms_v2_legacy_utils import ( ArgsKwargs, combinations_grid, DEFAULT_PORTRAIT_SPATIAL_SIZE, @@ -26,8 +28,6 @@ mark_framework_limitation, TestMark, ) -from torchvision import datapoints -from torchvision.transforms._functional_tensor import _max_value as get_max_value, _parse_pad_padding __all__ = ["KernelInfo", "KERNEL_INFOS"] diff --git a/test/transforms_v2_legacy_utils.py b/test/transforms_v2_legacy_utils.py new file mode 100644 index 00000000000..bb8943a8889 --- /dev/null +++ b/test/transforms_v2_legacy_utils.py @@ -0,0 +1,633 @@ +""" +As the name implies, these are legacy utilities that are hopefully removed soon. The future of +transforms v2 testing is in test/test_transforms_v2_refactored.py. All new test should be +implemented there and must not use any of the utilities here. + +The following legacy modules depend on this module + +- transforms_v2_kernel_infos.py +- transforms_v2_dispatcher_infos.py +- test_transforms_v2_functional.py +- test_transforms_v2_consistency.py +- test_transforms.py + +When all the logic is ported from the files above to test_transforms_v2_refactored.py, delete +all the legacy modules including this one and drop the _refactored prefix from the name. +""" + +import collections.abc +import dataclasses +import enum +import itertools +import pathlib +from collections import defaultdict +from typing import Callable, Sequence, Tuple, Union + +import PIL.Image +import pytest +import torch + +from torchvision import datapoints +from torchvision.transforms._functional_tensor import _max_value as get_max_value +from torchvision.transforms.v2.functional import to_dtype_image, to_image, to_pil_image + + +def combinations_grid(**kwargs): + """Creates a grid of input combinations. + + Each element in the returned sequence is a dictionary containing one possible combination as values. + + Example: + >>> combinations_grid(foo=("bar", "baz"), spam=("eggs", "ham")) + [ + {'foo': 'bar', 'spam': 'eggs'}, + {'foo': 'bar', 'spam': 'ham'}, + {'foo': 'baz', 'spam': 'eggs'}, + {'foo': 'baz', 'spam': 'ham'} + ] + """ + return [dict(zip(kwargs.keys(), values)) for values in itertools.product(*kwargs.values())] + + +DEFAULT_SIZE = (17, 11) + +NUM_CHANNELS_MAP = { + "GRAY": 1, + "GRAY_ALPHA": 2, + "RGB": 3, + "RGBA": 4, +} + + +def make_image( + size=DEFAULT_SIZE, + *, + color_space="RGB", + batch_dims=(), + dtype=None, + device="cpu", + memory_format=torch.contiguous_format, +): + num_channels = NUM_CHANNELS_MAP[color_space] + dtype = dtype or torch.uint8 + max_value = get_max_value(dtype) + data = torch.testing.make_tensor( + (*batch_dims, num_channels, *size), + low=0, + high=max_value, + dtype=dtype, + device=device, + memory_format=memory_format, + ) + if color_space in {"GRAY_ALPHA", "RGBA"}: + data[..., -1, :, :] = max_value + + return datapoints.Image(data) + + +def make_image_tensor(*args, **kwargs): + return make_image(*args, **kwargs).as_subclass(torch.Tensor) + + +def make_image_pil(*args, **kwargs): + return to_pil_image(make_image(*args, **kwargs)) + + +def make_bounding_boxes( + canvas_size=DEFAULT_SIZE, + *, + format=datapoints.BoundingBoxFormat.XYXY, + batch_dims=(), + dtype=None, + device="cpu", +): + def sample_position(values, max_value): + # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high. + # However, if we have batch_dims, we need tensors as limits. + return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape) + + if isinstance(format, str): + format = datapoints.BoundingBoxFormat[format] + + dtype = dtype or torch.float32 + + if any(dim == 0 for dim in batch_dims): + return datapoints.BoundingBoxes( + torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, canvas_size=canvas_size + ) + + h, w = [torch.randint(1, c, batch_dims) for c in canvas_size] + y = sample_position(h, canvas_size[0]) + x = sample_position(w, canvas_size[1]) + + if format is datapoints.BoundingBoxFormat.XYWH: + parts = (x, y, w, h) + elif format is datapoints.BoundingBoxFormat.XYXY: + x1, y1 = x, y + x2 = x1 + w + y2 = y1 + h + parts = (x1, y1, x2, y2) + elif format is datapoints.BoundingBoxFormat.CXCYWH: + cx = x + w / 2 + cy = y + h / 2 + parts = (cx, cy, w, h) + else: + raise ValueError(f"Format {format} is not supported") + + return datapoints.BoundingBoxes( + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, canvas_size=canvas_size + ) + + +def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): + """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, num_objects, *size), + low=0, + high=2, + dtype=dtype or torch.bool, + device=device, + ) + ) + + +def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): + """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, *size), + low=0, + high=num_categories, + dtype=dtype or torch.uint8, + device=device, + ) + ) + + +def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs): + return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs)) + + +def make_video_tensor(*args, **kwargs): + return make_video(*args, **kwargs).as_subclass(torch.Tensor) + + +DEFAULT_SQUARE_SPATIAL_SIZE = 15 +DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) +DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) +DEFAULT_SPATIAL_SIZES = ( + DEFAULT_LANDSCAPE_SPATIAL_SIZE, + DEFAULT_PORTRAIT_SPATIAL_SIZE, + DEFAULT_SQUARE_SPATIAL_SIZE, +) + + +def _parse_size(size, *, name="size"): + if size == "random": + raise ValueError("This should never happen") + elif isinstance(size, int) and size > 0: + return (size, size) + elif ( + isinstance(size, collections.abc.Sequence) + and len(size) == 2 + and all(isinstance(length, int) and length > 0 for length in size) + ): + return tuple(size) + else: + raise pytest.UsageError( + f"'{name}' can either be `'random'`, a positive integer, or a sequence of two positive integers," + f"but got {size} instead." + ) + + +def get_num_channels(color_space): + num_channels = NUM_CHANNELS_MAP.get(color_space) + if not num_channels: + raise pytest.UsageError(f"Can't determine the number of channels for color space {color_space}") + return num_channels + + +VALID_EXTRA_DIMS = ((), (4,), (2, 3)) +DEGENERATE_BATCH_DIMS = ((0,), (5, 0), (0, 5)) + +DEFAULT_EXTRA_DIMS = (*VALID_EXTRA_DIMS, *DEGENERATE_BATCH_DIMS) + + +def from_loader(loader_fn): + def wrapper(*args, **kwargs): + device = kwargs.pop("device", "cpu") + loader = loader_fn(*args, **kwargs) + return loader.load(device) + + return wrapper + + +def from_loaders(loaders_fn): + def wrapper(*args, **kwargs): + device = kwargs.pop("device", "cpu") + loaders = loaders_fn(*args, **kwargs) + for loader in loaders: + yield loader.load(device) + + return wrapper + + +@dataclasses.dataclass +class TensorLoader: + fn: Callable[[Sequence[int], torch.dtype, Union[str, torch.device]], torch.Tensor] + shape: Sequence[int] + dtype: torch.dtype + + def load(self, device): + return self.fn(self.shape, self.dtype, device) + + +@dataclasses.dataclass +class ImageLoader(TensorLoader): + spatial_size: Tuple[int, int] = dataclasses.field(init=False) + num_channels: int = dataclasses.field(init=False) + memory_format: torch.memory_format = torch.contiguous_format + canvas_size: Tuple[int, int] = dataclasses.field(init=False) + + def __post_init__(self): + self.spatial_size = self.canvas_size = self.shape[-2:] + self.num_channels = self.shape[-3] + + def load(self, device): + return self.fn(self.shape, self.dtype, device, memory_format=self.memory_format) + + +def make_image_loader( + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, + *, + color_space="RGB", + extra_dims=(), + dtype=torch.float32, + constant_alpha=True, + memory_format=torch.contiguous_format, +): + if not constant_alpha: + raise ValueError("This should never happen") + size = _parse_size(size) + num_channels = get_num_channels(color_space) + + def fn(shape, dtype, device, memory_format): + *batch_dims, _, height, width = shape + return make_image( + (height, width), + color_space=color_space, + batch_dims=batch_dims, + dtype=dtype, + device=device, + memory_format=memory_format, + ) + + return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format) + + +def make_image_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + color_spaces=( + "GRAY", + "GRAY_ALPHA", + "RGB", + "RGBA", + ), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.float32, torch.float64, torch.uint8), + constant_alpha=True, +): + for params in combinations_grid(size=sizes, color_space=color_spaces, extra_dims=extra_dims, dtype=dtypes): + yield make_image_loader(**params, constant_alpha=constant_alpha) + + +make_images = from_loaders(make_image_loaders) + + +def make_image_loader_for_interpolation( + size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format +): + size = _parse_size(size) + num_channels = get_num_channels(color_space) + + def fn(shape, dtype, device, memory_format): + height, width = shape[-2:] + + image_pil = ( + PIL.Image.open(pathlib.Path(__file__).parent / "assets" / "encode_jpeg" / "grace_hopper_517x606.jpg") + .resize((width, height)) + .convert( + { + "GRAY": "L", + "GRAY_ALPHA": "LA", + "RGB": "RGB", + "RGBA": "RGBA", + }[color_space] + ) + ) + + image_tensor = to_image(image_pil) + if memory_format == torch.contiguous_format: + image_tensor = image_tensor.to(device=device, memory_format=memory_format, copy=True) + else: + image_tensor = image_tensor.to(device=device) + image_tensor = to_dtype_image(image_tensor, dtype=dtype, scale=True) + + return datapoints.Image(image_tensor) + + return ImageLoader(fn, shape=(num_channels, *size), dtype=dtype, memory_format=memory_format) + + +def make_image_loaders_for_interpolation( + sizes=((233, 147),), + color_spaces=("RGB",), + dtypes=(torch.uint8,), + memory_formats=(torch.contiguous_format, torch.channels_last), +): + for params in combinations_grid(size=sizes, color_space=color_spaces, dtype=dtypes, memory_format=memory_formats): + yield make_image_loader_for_interpolation(**params) + + +@dataclasses.dataclass +class BoundingBoxesLoader(TensorLoader): + format: datapoints.BoundingBoxFormat + spatial_size: Tuple[int, int] + canvas_size: Tuple[int, int] = dataclasses.field(init=False) + + def __post_init__(self): + self.canvas_size = self.spatial_size + + +def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32): + if isinstance(format, str): + format = datapoints.BoundingBoxFormat[format] + + spatial_size = _parse_size(spatial_size, name="spatial_size") + + def fn(shape, dtype, device): + *batch_dims, num_coordinates = shape + if num_coordinates != 4: + raise pytest.UsageError() + + return make_bounding_boxes( + format=format, canvas_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device + ) + + return BoundingBoxesLoader(fn, shape=(*extra_dims[-1:], 4), dtype=dtype, format=format, spatial_size=spatial_size) + + +def make_bounding_box_loaders( + *, + extra_dims=tuple(d for d in DEFAULT_EXTRA_DIMS if len(d) < 2), + formats=tuple(datapoints.BoundingBoxFormat), + spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, + dtypes=(torch.float32, torch.float64, torch.int64), +): + for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): + yield make_bounding_box_loader(**params, spatial_size=spatial_size) + + +make_multiple_bounding_boxes = from_loaders(make_bounding_box_loaders) + + +class MaskLoader(TensorLoader): + pass + + +def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8): + # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects + size = _parse_size(size) + + def fn(shape, dtype, device): + *batch_dims, num_objects, height, width = shape + return make_detection_mask( + (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device + ) + + return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) + + +def make_detection_mask_loaders( + sizes=DEFAULT_SPATIAL_SIZES, + num_objects=(1, 0, 5), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8,), +): + for params in combinations_grid(size=sizes, num_objects=num_objects, extra_dims=extra_dims, dtype=dtypes): + yield make_detection_mask_loader(**params) + + +make_detection_masks = from_loaders(make_detection_mask_loaders) + + +def make_segmentation_mask_loader( + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8 +): + # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values + size = _parse_size(size) + + def fn(shape, dtype, device): + *batch_dims, height, width = shape + return make_segmentation_mask( + (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device + ) + + return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) + + +def make_segmentation_mask_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + num_categories=(1, 2, 10), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8,), +): + for params in combinations_grid(size=sizes, num_categories=num_categories, extra_dims=extra_dims, dtype=dtypes): + yield make_segmentation_mask_loader(**params) + + +make_segmentation_masks = from_loaders(make_segmentation_mask_loaders) + + +def make_mask_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + num_objects=(1, 0, 5), + num_categories=(1, 2, 10), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8,), +): + yield from make_detection_mask_loaders(sizes=sizes, num_objects=num_objects, extra_dims=extra_dims, dtypes=dtypes) + yield from make_segmentation_mask_loaders( + sizes=sizes, num_categories=num_categories, extra_dims=extra_dims, dtypes=dtypes + ) + + +make_masks = from_loaders(make_mask_loaders) + + +class VideoLoader(ImageLoader): + pass + + +def make_video_loader( + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, + *, + color_space="RGB", + num_frames=3, + extra_dims=(), + dtype=torch.uint8, +): + size = _parse_size(size) + + def fn(shape, dtype, device, memory_format): + *batch_dims, num_frames, _, height, width = shape + return make_video( + (height, width), + num_frames=num_frames, + batch_dims=batch_dims, + color_space=color_space, + dtype=dtype, + device=device, + memory_format=memory_format, + ) + + return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) + + +def make_video_loaders( + *, + sizes=DEFAULT_SPATIAL_SIZES, + color_spaces=( + "GRAY", + "RGB", + ), + num_frames=(1, 0, 3), + extra_dims=DEFAULT_EXTRA_DIMS, + dtypes=(torch.uint8, torch.float32, torch.float64), +): + for params in combinations_grid( + size=sizes, color_space=color_spaces, num_frames=num_frames, extra_dims=extra_dims, dtype=dtypes + ): + yield make_video_loader(**params) + + +make_videos = from_loaders(make_video_loaders) + + +class TestMark: + def __init__( + self, + # Tuple of test class name and test function name that identifies the test the mark is applied to. If there is + # no test class, i.e. a standalone test function, use `None`. + test_id, + # `pytest.mark.*` to apply, e.g. `pytest.mark.skip` or `pytest.mark.xfail` + mark, + *, + # Callable, that will be passed an `ArgsKwargs` and should return a boolean to indicate if the mark will be + # applied. If omitted, defaults to always apply. + condition=None, + ): + self.test_id = test_id + self.mark = mark + self.condition = condition or (lambda args_kwargs: True) + + +def mark_framework_limitation(test_id, reason, condition=None): + # The purpose of this function is to have a single entry point for skip marks that are only there, because the test + # framework cannot handle the kernel in general or a specific parameter combination. + # As development progresses, we can change the `mark.skip` to `mark.xfail` from time to time to see if the skip is + # still justified. + # We don't want to use `mark.xfail` all the time, because that actually runs the test until an error happens. Thus, + # we are wasting CI resources for no reason for most of the time + return TestMark(test_id, pytest.mark.skip(reason=reason), condition=condition) + + +class InfoBase: + def __init__( + self, + *, + # Identifier if the info that shows up the parametrization. + id, + # Test markers that will be (conditionally) applied to an `ArgsKwargs` parametrization. + # See the `TestMark` class for details + test_marks=None, + # Additional parameters, e.g. `rtol=1e-3`, passed to `assert_close`. Keys are a 3-tuple of `test_id` (see + # `TestMark`), the dtype, and the device. + closeness_kwargs=None, + ): + self.id = id + + self.test_marks = test_marks or [] + test_marks_map = defaultdict(list) + for test_mark in self.test_marks: + test_marks_map[test_mark.test_id].append(test_mark) + self._test_marks_map = dict(test_marks_map) + + self.closeness_kwargs = closeness_kwargs or dict() + + def get_marks(self, test_id, args_kwargs): + return [ + test_mark.mark for test_mark in self._test_marks_map.get(test_id, []) if test_mark.condition(args_kwargs) + ] + + def get_closeness_kwargs(self, test_id, *, dtype, device): + if not (isinstance(test_id, tuple) and len(test_id) == 2): + msg = "`test_id` should be a `Tuple[Optional[str], str]` denoting the test class and function name" + if callable(test_id): + msg += ". Did you forget to add the `test_id` fixture to parameters of the test?" + else: + msg += f", but got {test_id} instead." + raise pytest.UsageError(msg) + if isinstance(device, torch.device): + device = device.type + return self.closeness_kwargs.get((test_id, dtype, device), dict()) + + +class ArgsKwargs: + def __init__(self, *args, **kwargs): + self.args = args + self.kwargs = kwargs + + def __iter__(self): + yield self.args + yield self.kwargs + + def load(self, device="cpu"): + return ArgsKwargs( + *(arg.load(device) if isinstance(arg, TensorLoader) else arg for arg in self.args), + **{ + keyword: arg.load(device) if isinstance(arg, TensorLoader) else arg + for keyword, arg in self.kwargs.items() + }, + ) + + +def parametrized_error_message(*args, **kwargs): + def to_str(obj): + if isinstance(obj, torch.Tensor) and obj.numel() > 30: + return f"tensor(shape={list(obj.shape)}, dtype={obj.dtype}, device={obj.device})" + elif isinstance(obj, enum.Enum): + return f"{type(obj).__name__}.{obj.name}" + else: + return repr(obj) + + if args or kwargs: + postfix = "\n".join( + [ + "", + "Failure happened for the following parameters:", + "", + *[to_str(arg) for arg in args], + *[f"{name}={to_str(kwarg)}" for name, kwarg in kwargs.items()], + ] + ) + else: + postfix = "" + + def wrapper(msg): + return msg + postfix + + return wrapper