diff --git a/.clang-format b/.clang-format index 6d0ab740db4..95d60445f4a 100644 --- a/.clang-format +++ b/.clang-format @@ -60,9 +60,6 @@ MacroBlockBegin: '' MacroBlockEnd: '' MaxEmptyLinesToKeep: 1 NamespaceIndentation: None -ObjCBlockIndentWidth: 2 -ObjCSpaceAfterProperty: false -ObjCSpaceBeforeProtocolList: false PenaltyBreakBeforeFirstCallParameter: 1 PenaltyBreakComment: 300 PenaltyBreakFirstLessLess: 120 @@ -85,4 +82,11 @@ SpacesInSquareBrackets: false Standard: Cpp11 TabWidth: 8 UseTab: Never +--- +Language: ObjC +ColumnLimit: 120 +AlignAfterOpenBracket: Align +ObjCBlockIndentWidth: 2 +ObjCSpaceAfterProperty: false +ObjCSpaceBeforeProtocolList: false ... diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index eec93854788..b9754e29b1c 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -7,3 +7,5 @@ d367a01a18a3ae6bee13d8be3b63fd6a581ea46f # Upgrade usort to 1.0.2 and black to 22.3.0 (#5106) 6ca9c76adb6daf2695d603ad623a9cf1c4f4806f +# Fix unnecessary exploded black formatting (#7709) +a335d916db0694770e8152f41e19195de3134523 diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh index d102735909e..e4af4e7c61a 100755 --- a/.github/scripts/setup-env.sh +++ b/.github/scripts/setup-env.sh @@ -54,7 +54,11 @@ echo '::endgroup::' if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then echo '::group::Install VisualStudio CUDA extensions on Windows' - TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations" + if [[ "${VC_YEAR:-}" == "2022" ]]; then + TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2022/BuildTools/MSBuild/Microsoft/VC/v170/BuildCustomizations" + else + TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations" + fi mkdir -p "${TARGET_DIR}" cp -r "${CUDA_HOME}/MSBuildExtensions/"* "${TARGET_DIR}" echo '::endgroup::' diff --git a/.github/workflows/build-cmake.yml b/.github/workflows/build-cmake.yml index 06bd4de753e..3871dca340f 100644 --- a/.github/workflows/build-cmake.yml +++ b/.github/workflows/build-cmake.yml @@ -74,9 +74,9 @@ jobs: script: | set -euo pipefail - source packaging/windows/internal/vc_install_helper.sh - export PYTHON_VERSION=3.8 + export VC_YEAR=2022 + export VSDEVCMD_ARGS="" export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml index cd6011b4ad4..22e1a4ac18d 100644 --- a/.github/workflows/tests.yml +++ b/.github/workflows/tests.yml @@ -98,6 +98,8 @@ jobs: set -euxo pipefail export PYTHON_VERSION=${{ matrix.python-version }} + export VC_YEAR=2019 + export VSDEVCMD_ARGS="" export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }} export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 343df7f1021..762ebf6fce0 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -6,6 +6,8 @@ repos: - id: check-toml - id: check-yaml exclude: packaging/.* + args: + - --allow-multiple-documents - id: mixed-line-ending args: [--fix=lf] - id: end-of-file-fixer diff --git a/docs/source/conf.py b/docs/source/conf.py index 4bb75fe6eeb..7b3e9e8a7f3 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -140,7 +140,7 @@ "logo_only": True, "pytorch_project": "docs", "navigation_with_keys": True, - "analytics_id": "UA-117752657-2", + "analytics_id": "GTM-T8XT4PS", } html_logo = "_static/img/pytorch-logo-dark.svg" diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py index e6a4ef9d458..6a8d35f1a6e 100644 --- a/packaging/wheel/relocate.py +++ b/packaging/wheel/relocate.py @@ -2,7 +2,6 @@ import glob import hashlib -import io # Standard library imports import os @@ -65,21 +64,12 @@ PYTHON_VERSION = sys.version_info -def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE): - """Yield pieces of data from a file-like object until EOF.""" - while True: - chunk = file.read(size) - if not chunk: - break - yield chunk - - def rehash(path, blocksize=1 << 20): """Return (hash, length) for path using hashlib.sha256()""" h = hashlib.sha256() length = 0 with open(path, "rb") as f: - for block in read_chunks(f, size=blocksize): + while block := f.read(blocksize): length += len(block) h.update(block) digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=") diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat index e85a372f93d..d3484a66e9f 100644 --- a/packaging/windows/internal/vc_env_helper.bat +++ b/packaging/windows/internal/vc_env_helper.bat @@ -1,7 +1,11 @@ @echo on -set VC_VERSION_LOWER=16 -set VC_VERSION_UPPER=17 +set VC_VERSION_LOWER=17 +set VC_VERSION_UPPER=18 +if "%VC_YEAR%" == "2019" ( + set VC_VERSION_LOWER=16 + set VC_VERSION_UPPER=17 +) if "%VC_YEAR%" == "2017" ( set VC_VERSION_LOWER=15 set VC_VERSION_UPPER=16 diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh deleted file mode 100644 index 251509ae194..00000000000 --- a/packaging/windows/internal/vc_install_helper.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/bash - -set -ex - -export VC_YEAR=2019 -export VSDEVCMD_ARGS="" diff --git a/test/assets/toosmall_png/heapbof.png b/test/assets/toosmall_png/heapbof.png new file mode 100644 index 00000000000..e720d183342 Binary files /dev/null and b/test/assets/toosmall_png/heapbof.png differ diff --git a/test/common_utils.py b/test/common_utils.py index 1d0b82a827c..72ecf104301 100644 --- a/test/common_utils.py +++ b/test/common_utils.py @@ -7,9 +7,11 @@ import os import pathlib import random +import re import shutil import sys import tempfile +import warnings from collections import defaultdict from subprocess import CalledProcessError, check_output, STDOUT from typing import Callable, Sequence, Tuple, Union @@ -25,7 +27,7 @@ from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair from torchvision import datapoints, io from torchvision.transforms._functional_tensor import _max_value as get_max_value -from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor +from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_pil, to_image_tensor IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"]) @@ -122,7 +124,7 @@ def disable_console_output(): yield -def cpu_and_gpu(): +def cpu_and_cuda(): import pytest # noqa return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda)) @@ -397,6 +399,9 @@ def load(self, device="cpu"): ) +# new v2 default +DEFAULT_SIZE = (17, 11) +# old v2 defaults DEFAULT_SQUARE_SPATIAL_SIZE = 15 DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33) DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9) @@ -404,13 +409,12 @@ def load(self, device="cpu"): DEFAULT_LANDSCAPE_SPATIAL_SIZE, DEFAULT_PORTRAIT_SPATIAL_SIZE, DEFAULT_SQUARE_SPATIAL_SIZE, - "random", ) def _parse_spatial_size(size, *, name="size"): if size == "random": - return tuple(torch.randint(15, 33, (2,)).tolist()) + raise ValueError("This should never happen") elif isinstance(size, int) and size > 0: return (size, size) elif ( @@ -490,8 +494,40 @@ def get_num_channels(color_space): return num_channels +def make_image( + size=DEFAULT_SIZE, + *, + color_space="RGB", + batch_dims=(), + dtype=None, + device="cpu", + memory_format=torch.contiguous_format, +): + max_value = get_max_value(dtype) + data = torch.testing.make_tensor( + (*batch_dims, get_num_channels(color_space), *size), + low=0, + high=max_value, + dtype=dtype or torch.uint8, + device=device, + memory_format=memory_format, + ) + if color_space in {"GRAY_ALPHA", "RGBA"}: + data[..., -1, :, :] = max_value + + return datapoints.Image(data) + + +def make_image_tensor(*args, **kwargs): + return make_image(*args, **kwargs).as_subclass(torch.Tensor) + + +def make_image_pil(*args, **kwargs): + return to_image_pil(make_image(*args, **kwargs)) + + def make_image_loader( - size="random", + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, color_space="RGB", extra_dims=(), @@ -499,24 +535,25 @@ def make_image_loader( constant_alpha=True, memory_format=torch.contiguous_format, ): + if not constant_alpha: + raise ValueError("This should never happen") size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) def fn(shape, dtype, device, memory_format): - max_value = get_max_value(dtype) - data = torch.testing.make_tensor( - shape, low=0, high=max_value, dtype=dtype, device=device, memory_format=memory_format + *batch_dims, _, height, width = shape + return make_image( + (height, width), + color_space=color_space, + batch_dims=batch_dims, + dtype=dtype, + device=device, + memory_format=memory_format, ) - if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha: - data[..., -1, :, :] = max_value - return datapoints.Image(data) return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format) -make_image = from_loader(make_image_loader) - - def make_image_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, @@ -538,7 +575,7 @@ def make_image_loaders( def make_image_loader_for_interpolation( - size="random", *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format + size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format ): size = _parse_spatial_size(size) num_channels = get_num_channels(color_space) @@ -587,76 +624,114 @@ class BoundingBoxLoader(TensorLoader): spatial_size: Tuple[int, int] -def randint_with_tensor_bounds(arg1, arg2=None, **kwargs): - low, high = torch.broadcast_tensors( - *[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))] +def make_bounding_box( + size=None, + *, + format=datapoints.BoundingBoxFormat.XYXY, + spatial_size=None, + batch_dims=(), + dtype=None, + device="cpu", +): + """ + size: Size of the actual bounding box, i.e. + - (box[3] - box[1], box[2] - box[0]) for XYXY + - (H, W) for XYWH and CXCYWH + spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on + returned datapoints.BoundingBox + + To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker + functions, e.g. + + .. code:: + + image = make_image=(size=size) + bounding_box = make_bounding_box(spatial_size=size) + assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image) + + For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all + other maker functions, e.g. + + .. code:: + + image = make_image=() + bounding_box = make_bounding_box() + assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image) + """ + + def sample_position(values, max_value): + # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high. + # However, if we have batch_dims, we need tensors as limits. + return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape) + + if isinstance(format, str): + format = datapoints.BoundingBoxFormat[format] + + if spatial_size is None: + if size is None: + spatial_size = DEFAULT_SIZE + else: + height, width = size + height_margin, width_margin = torch.randint(10, (2,)).tolist() + spatial_size = (height + height_margin, width + width_margin) + + dtype = dtype or torch.float32 + + if any(dim == 0 for dim in batch_dims): + return datapoints.BoundingBox( + torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size + ) + + if size is None: + h, w = [torch.randint(1, s, batch_dims) for s in spatial_size] + else: + h, w = [torch.full(batch_dims, s, dtype=torch.int) for s in size] + + y = sample_position(h, spatial_size[0]) + x = sample_position(w, spatial_size[1]) + + if format is datapoints.BoundingBoxFormat.XYWH: + parts = (x, y, w, h) + elif format is datapoints.BoundingBoxFormat.XYXY: + x1, y1 = x, y + x2 = x1 + w + y2 = y1 + h + parts = (x1, y1, x2, y2) + elif format is datapoints.BoundingBoxFormat.CXCYWH: + cx = x + w / 2 + cy = y + h / 2 + parts = (cx, cy, w, h) + else: + raise ValueError(f"Format {format} is not supported") + + return datapoints.BoundingBox( + torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size ) - return torch.stack( - [ - torch.randint(low_scalar, high_scalar, (), **kwargs) - for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist()) - ] - ).reshape(low.shape) -def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32): +def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32): if isinstance(format, str): format = datapoints.BoundingBoxFormat[format] - if format not in { - datapoints.BoundingBoxFormat.XYXY, - datapoints.BoundingBoxFormat.XYWH, - datapoints.BoundingBoxFormat.CXCYWH, - }: - raise pytest.UsageError(f"Can't make bounding box in format {format}") spatial_size = _parse_spatial_size(spatial_size, name="spatial_size") def fn(shape, dtype, device): - *extra_dims, num_coordinates = shape + *batch_dims, num_coordinates = shape if num_coordinates != 4: raise pytest.UsageError() - if any(dim == 0 for dim in extra_dims): - return datapoints.BoundingBox( - torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size - ) - - height, width = spatial_size - - if format == datapoints.BoundingBoxFormat.XYXY: - x1 = torch.randint(0, width // 2, extra_dims) - y1 = torch.randint(0, height // 2, extra_dims) - x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1 - y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1 - parts = (x1, y1, x2, y2) - elif format == datapoints.BoundingBoxFormat.XYWH: - x = torch.randint(0, width // 2, extra_dims) - y = torch.randint(0, height // 2, extra_dims) - w = randint_with_tensor_bounds(1, width - x) - h = randint_with_tensor_bounds(1, height - y) - parts = (x, y, w, h) - else: # format == features.BoundingBoxFormat.CXCYWH: - cx = torch.randint(1, width - 1, extra_dims) - cy = torch.randint(1, height - 1, extra_dims) - w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1) - h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1) - parts = (cx, cy, w, h) - - return datapoints.BoundingBox( - torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size + return make_bounding_box( + format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device ) return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size) -make_bounding_box = from_loader(make_bounding_box_loader) - - def make_bounding_box_loaders( *, extra_dims=DEFAULT_EXTRA_DIMS, formats=tuple(datapoints.BoundingBoxFormat), - spatial_size="random", + spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtypes=(torch.float32, torch.float64, torch.int64), ): for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes): @@ -670,24 +745,35 @@ class MaskLoader(TensorLoader): pass -def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8): +def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"): + """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks""" + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, num_objects, *size), + low=0, + high=2, + dtype=dtype or torch.bool, + device=device, + ) + ) + + +def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8): # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects size = _parse_spatial_size(size) - num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device) - return datapoints.Mask(data) + *batch_dims, num_objects, height, width = shape + return make_detection_mask( + (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device + ) return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype) -make_detection_mask = from_loader(make_detection_mask_loader) - - def make_detection_mask_loaders( sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), + num_objects=(1, 0, 5), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -698,25 +784,38 @@ def make_detection_mask_loaders( make_detection_masks = from_loaders(make_detection_mask_loaders) -def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8): - # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values - size = _parse_spatial_size(size) - num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories +def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"): + """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value""" + return datapoints.Mask( + torch.testing.make_tensor( + (*batch_dims, *size), + low=0, + high=num_categories, + dtype=dtype or torch.uint8, + device=device, + ) + ) - def fn(shape, dtype, device): - data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device) - return datapoints.Mask(data) - return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype) +def make_segmentation_mask_loader( + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8 +): + # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values + spatial_size = _parse_spatial_size(size) + def fn(shape, dtype, device): + *batch_dims, height, width = shape + return make_segmentation_mask( + (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device + ) -make_segmentation_mask = from_loader(make_segmentation_mask_loader) + return MaskLoader(fn, shape=(*extra_dims, *spatial_size), dtype=dtype) def make_segmentation_mask_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, - num_categories=(1, 2, "random"), + num_categories=(1, 2, 10), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -730,8 +829,8 @@ def make_segmentation_mask_loaders( def make_mask_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, - num_objects=(1, 0, "random"), - num_categories=(1, 2, "random"), + num_objects=(1, 0, 5), + num_categories=(1, 2, 10), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8,), ): @@ -748,29 +847,35 @@ class VideoLoader(ImageLoader): pass +def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs): + return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs)) + + def make_video_loader( - size="random", + size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, color_space="RGB", - num_frames="random", + num_frames=3, extra_dims=(), dtype=torch.uint8, ): size = _parse_spatial_size(size) - num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames def fn(shape, dtype, device, memory_format): - video = make_image( - size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device, memory_format=memory_format + *batch_dims, num_frames, _, height, width = shape + return make_video( + (height, width), + num_frames=num_frames, + batch_dims=batch_dims, + color_space=color_space, + dtype=dtype, + device=device, + memory_format=memory_format, ) - return datapoints.Video(video) return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype) -make_video = from_loader(make_video_loader) - - def make_video_loaders( *, sizes=DEFAULT_SPATIAL_SIZES, @@ -778,7 +883,7 @@ def make_video_loaders( "GRAY", "RGB", ), - num_frames=(1, 0, "random"), + num_frames=(1, 0, 3), extra_dims=DEFAULT_EXTRA_DIMS, dtypes=(torch.uint8, torch.float32, torch.float64), ): @@ -880,3 +985,23 @@ def assert_run_python_script(source_code): raise RuntimeError(f"script errored with output:\n{e.output.decode()}") if out != b"": raise AssertionError(out.decode()) + + +@contextlib.contextmanager +def assert_no_warnings(): + # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes + # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit. + with warnings.catch_warnings(): + warnings.simplefilter("error") + yield + + +@contextlib.contextmanager +def ignore_jit_no_profile_information_warning(): + # Calling a scripted object often triggers a warning like + # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information` + # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore + # them. + with warnings.catch_warnings(): + warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning) + yield diff --git a/test/conftest.py b/test/conftest.py index a9e8f1cda52..468587f1c9e 100644 --- a/test/conftest.py +++ b/test/conftest.py @@ -33,7 +33,7 @@ def pytest_collection_modifyitems(items): # The needs_cuda mark will exist if the test was explicitly decorated with # the @needs_cuda decorator. It will also exist if it was parametrized with a # parameter that has the mark: for example if a test is parametrized with - # @pytest.mark.parametrize('device', cpu_and_gpu()) + # @pytest.mark.parametrize('device', cpu_and_cuda()) # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark, # and the ones with device == 'cpu' won't have the mark. needs_cuda = item.get_closest_marker("needs_cuda") is not None diff --git a/test/test_datapoints.py b/test/test_datapoints.py index 39c05123333..1334fd7283b 100644 --- a/test/test_datapoints.py +++ b/test/test_datapoints.py @@ -1,5 +1,8 @@ +from copy import deepcopy + import pytest import torch +from common_utils import assert_equal from PIL import Image from torchvision import datapoints @@ -30,3 +33,154 @@ def test_bbox_instance(data, format): if isinstance(format, str): format = datapoints.BoundingBoxFormat[(format.upper())] assert bboxes.format == format + + +@pytest.mark.parametrize( + ("data", "input_requires_grad", "expected_requires_grad"), + [ + ([[[0.0, 1.0], [0.0, 1.0]]], None, False), + ([[[0.0, 1.0], [0.0, 1.0]]], False, False), + ([[[0.0, 1.0], [0.0, 1.0]]], True, True), + (torch.rand(3, 16, 16, requires_grad=False), None, False), + (torch.rand(3, 16, 16, requires_grad=False), False, False), + (torch.rand(3, 16, 16, requires_grad=False), True, True), + (torch.rand(3, 16, 16, requires_grad=True), None, True), + (torch.rand(3, 16, 16, requires_grad=True), False, False), + (torch.rand(3, 16, 16, requires_grad=True), True, True), + ], +) +def test_new_requires_grad(data, input_requires_grad, expected_requires_grad): + datapoint = datapoints.Image(data, requires_grad=input_requires_grad) + assert datapoint.requires_grad is expected_requires_grad + + +def test_isinstance(): + assert isinstance(datapoints.Image(torch.rand(3, 16, 16)), torch.Tensor) + + +def test_wrapping_no_copy(): + tensor = torch.rand(3, 16, 16) + image = datapoints.Image(tensor) + + assert image.data_ptr() == tensor.data_ptr() + + +def test_to_wrapping(): + image = datapoints.Image(torch.rand(3, 16, 16)) + + image_to = image.to(torch.float64) + + assert type(image_to) is datapoints.Image + assert image_to.dtype is torch.float64 + + +def test_to_datapoint_reference(): + tensor = torch.rand((3, 16, 16), dtype=torch.float64) + image = datapoints.Image(tensor) + + tensor_to = tensor.to(image) + + assert type(tensor_to) is torch.Tensor + assert tensor_to.dtype is torch.float64 + + +def test_clone_wrapping(): + image = datapoints.Image(torch.rand(3, 16, 16)) + + image_clone = image.clone() + + assert type(image_clone) is datapoints.Image + assert image_clone.data_ptr() != image.data_ptr() + + +def test_requires_grad__wrapping(): + image = datapoints.Image(torch.rand(3, 16, 16)) + + assert not image.requires_grad + + image_requires_grad = image.requires_grad_(True) + + assert type(image_requires_grad) is datapoints.Image + assert image.requires_grad + assert image_requires_grad.requires_grad + + +def test_detach_wrapping(): + image = datapoints.Image(torch.rand(3, 16, 16), requires_grad=True) + + image_detached = image.detach() + + assert type(image_detached) is datapoints.Image + + +def test_other_op_no_wrapping(): + image = datapoints.Image(torch.rand(3, 16, 16)) + + # any operation besides the ones listed in `Datapoint._NO_WRAPPING_EXCEPTIONS` will do here + output = image * 2 + + assert type(output) is torch.Tensor + + +@pytest.mark.parametrize( + "op", + [ + lambda t: t.numpy(), + lambda t: t.tolist(), + lambda t: t.max(dim=-1), + ], +) +def test_no_tensor_output_op_no_wrapping(op): + image = datapoints.Image(torch.rand(3, 16, 16)) + + output = op(image) + + assert type(output) is not datapoints.Image + + +def test_inplace_op_no_wrapping(): + image = datapoints.Image(torch.rand(3, 16, 16)) + + output = image.add_(0) + + assert type(output) is torch.Tensor + assert type(image) is datapoints.Image + + +def test_wrap_like(): + image = datapoints.Image(torch.rand(3, 16, 16)) + + # any operation besides the ones listed in `Datapoint._NO_WRAPPING_EXCEPTIONS` will do here + output = image * 2 + + image_new = datapoints.Image.wrap_like(image, output) + + assert type(image_new) is datapoints.Image + assert image_new.data_ptr() == output.data_ptr() + + +@pytest.mark.parametrize( + "datapoint", + [ + datapoints.Image(torch.rand(3, 16, 16)), + datapoints.Video(torch.rand(2, 3, 16, 16)), + datapoints.BoundingBox([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)), + datapoints.Mask(torch.randint(0, 256, (16, 16), dtype=torch.uint8)), + ], +) +@pytest.mark.parametrize("requires_grad", [False, True]) +def test_deepcopy(datapoint, requires_grad): + if requires_grad and not datapoint.dtype.is_floating_point: + return + + datapoint.requires_grad_(requires_grad) + + datapoint_deepcopied = deepcopy(datapoint) + + assert datapoint_deepcopied is not datapoint + assert datapoint_deepcopied.data_ptr() != datapoint.data_ptr() + assert_equal(datapoint_deepcopied, datapoint) + + assert type(datapoint_deepcopied) is type(datapoint) + assert datapoint_deepcopied.requires_grad is requires_grad + assert datapoint_deepcopied.is_leaf diff --git a/test/test_extended_models.py b/test/test_extended_models.py index 0866cc0f8a3..96a3fc5f8ed 100644 --- a/test/test_extended_models.py +++ b/test/test_extended_models.py @@ -103,17 +103,18 @@ def test_weights_deserializable(name): assert pickle.loads(pickle.dumps(weights)) is weights +def get_models_from_module(module): + return [ + v.__name__ + for k, v in module.__dict__.items() + if callable(v) and k[0].islower() and k[0] != "_" and k not in models._api.__all__ + ] + + @pytest.mark.parametrize( "module", [models, models.detection, models.quantization, models.segmentation, models.video, models.optical_flow] ) def test_list_models(module): - def get_models_from_module(module): - return [ - v.__name__ - for k, v in module.__dict__.items() - if callable(v) and k[0].islower() and k[0] != "_" and k not in models._api.__all__ - ] - a = set(get_models_from_module(module)) b = set(x.replace("quantized_", "") for x in models.list_models(module)) @@ -121,6 +122,65 @@ def get_models_from_module(module): assert a == b +@pytest.mark.parametrize( + "include_filters", + [ + None, + [], + (), + "", + "*resnet*", + ["*alexnet*"], + "*not-existing-model-for-test?", + ["*resnet*", "*alexnet*"], + ["*resnet*", "*alexnet*", "*not-existing-model-for-test?"], + ("*resnet*", "*alexnet*"), + set(["*resnet*", "*alexnet*"]), + ], +) +@pytest.mark.parametrize( + "exclude_filters", + [ + None, + [], + (), + "", + "*resnet*", + ["*alexnet*"], + ["*not-existing-model-for-test?"], + ["resnet34", "*not-existing-model-for-test?"], + ["resnet34", "*resnet1*"], + ("resnet34", "*resnet1*"), + set(["resnet34", "*resnet1*"]), + ], +) +def test_list_models_filters(include_filters, exclude_filters): + actual = set(models.list_models(models, include=include_filters, exclude=exclude_filters)) + classification_models = set(get_models_from_module(models)) + + if isinstance(include_filters, str): + include_filters = [include_filters] + if isinstance(exclude_filters, str): + exclude_filters = [exclude_filters] + + if include_filters: + expected = set() + for include_f in include_filters: + include_f = include_f.strip("*?") + expected = expected | set(x for x in classification_models if include_f in x) + else: + expected = classification_models + + if exclude_filters: + for exclude_f in exclude_filters: + exclude_f = exclude_f.strip("*?") + if exclude_f != "": + a_exclude = set(x for x in classification_models if exclude_f in x) + expected = expected - a_exclude + + assert expected == actual + + @pytest.mark.parametrize( "name, weight", [ diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py index 0e1cc648a19..fb3f5744e54 100644 --- a/test/test_functional_tensor.py +++ b/test/test_functional_tensor.py @@ -21,7 +21,7 @@ _create_data_batch, _test_fn_on_batch, assert_equal, - cpu_and_gpu, + cpu_and_cuda, needs_cuda, ) from torchvision.transforms import InterpolationMode @@ -34,7 +34,7 @@ ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("fn", [F.get_image_size, F.get_image_num_channels, F.get_dimensions]) def test_image_sizes(device, fn): script_F = torch.jit.script(fn) @@ -72,7 +72,7 @@ class TestRotate: scripted_rotate = torch.jit.script(F.rotate) IMG_W = 26 - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("height, width", [(7, 33), (26, IMG_W), (32, IMG_W)]) @pytest.mark.parametrize( "center", @@ -131,7 +131,7 @@ def test_rotate(self, device, height, width, center, dt, angle, expand, fill, fn f"{out_pil_tensor[0, :7, :7]}" ) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dt", ALL_DTYPES) def test_rotate_batch(self, device, dt): if dt == torch.float16 and device == "cpu": @@ -157,7 +157,7 @@ class TestAffine: ALL_DTYPES = [None, torch.float32, torch.float64, torch.float16] scripted_affine = torch.jit.script(F.affine) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)]) @pytest.mark.parametrize("dt", ALL_DTYPES) def test_identity_map(self, device, height, width, dt): @@ -180,7 +180,7 @@ def test_identity_map(self, device, height, width, dt): ) assert_equal(tensor, out_tensor, msg=f"{out_tensor[0, :5, :5]} vs {tensor[0, :5, :5]}") - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("height, width", [(26, 26)]) @pytest.mark.parametrize("dt", ALL_DTYPES) @pytest.mark.parametrize( @@ -224,7 +224,7 @@ def test_square_rotations(self, device, height, width, dt, angle, config, fn): # Tolerance : less than 6% of different pixels assert ratio_diff_pixels < 0.06 - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("height, width", [(32, 26)]) @pytest.mark.parametrize("dt", ALL_DTYPES) @pytest.mark.parametrize("angle", [90, 45, 15, -30, -60, -120]) @@ -258,7 +258,7 @@ def test_rect_rotations(self, device, height, width, dt, angle, fn, center): # Tolerance : less than 3% of different pixels assert ratio_diff_pixels < 0.03 - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)]) @pytest.mark.parametrize("dt", ALL_DTYPES) @pytest.mark.parametrize("t", [[10, 12], (-12, -13)]) @@ -283,7 +283,7 @@ def test_translations(self, device, height, width, dt, t, fn): _assert_equal_tensor_to_pil(out_tensor, out_pil_img) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)]) @pytest.mark.parametrize("dt", ALL_DTYPES) @pytest.mark.parametrize( @@ -293,24 +293,8 @@ def test_translations(self, device, height, width, dt, t, fn): (33, (5, -4), 1.0, [0.0, 0.0], [0, 0, 0]), (45, [-5, 4], 1.2, [0.0, 0.0], (1, 2, 3)), (33, (-4, -8), 2.0, [0.0, 0.0], [255, 255, 255]), - ( - 85, - (10, -10), - 0.7, - [0.0, 0.0], - [ - 1, - ], - ), - ( - 0, - [0, 0], - 1.0, - [ - 35.0, - ], - (2.0,), - ), + (85, (10, -10), 0.7, [0.0, 0.0], [1]), + (0, [0, 0], 1.0, [35.0], (2.0,)), (-25, [0, 0], 1.2, [0.0, 15.0], None), (-45, [-10, 0], 0.7, [2.0, 5.0], None), (-45, [-10, -10], 1.2, [4.0, 5.0], None), @@ -344,7 +328,7 @@ def test_all_ops(self, device, height, width, dt, a, t, s, sh, f, fn): tol = 0.06 if device == "cuda" else 0.05 assert ratio_diff_pixels < tol - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dt", ALL_DTYPES) def test_batches(self, device, dt): if dt == torch.float16 and device == "cpu": @@ -357,7 +341,7 @@ def test_batches(self, device, dt): _test_fn_on_batch(batch_tensors, F.affine, angle=-43, translate=[-3, 4], scale=1.2, shear=[4.0, 5.0]) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_interpolation_type(self, device): tensor, pil_img = _create_data(26, 26, device=device) @@ -389,22 +373,10 @@ def _get_data_dims_and_points_for_perspective(): return dims_and_points -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) -@pytest.mark.parametrize( - "fill", - ( - None, - [0, 0, 0], - [1, 2, 3], - [255, 255, 255], - [ - 1, - ], - (2.0,), - ), -) +@pytest.mark.parametrize("fill", (None, [0, 0, 0], [1, 2, 3], [255, 255, 255], [1], (2.0,))) @pytest.mark.parametrize("fn", [F.perspective, torch.jit.script(F.perspective)]) def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn): @@ -435,7 +407,7 @@ def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn): assert ratio_diff_pixels < 0.05 -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) def test_perspective_batch(device, dims_and_points, dt): @@ -473,21 +445,9 @@ def test_perspective_interpolation_type(): assert_equal(res1, res2) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) -@pytest.mark.parametrize( - "size", - [ - 32, - 26, - [ - 32, - ], - [32, 32], - (32, 32), - [26, 35], - ], -) +@pytest.mark.parametrize("size", [32, 26, [32], [32, 32], (32, 32), [26, 35]]) @pytest.mark.parametrize("max_size", [None, 34, 40, 1000]) @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST, NEAREST_EXACT]) def test_resize(device, dt, size, max_size, interpolation): @@ -539,7 +499,7 @@ def test_resize(device, dt, size, max_size, interpolation): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_resize_asserts(device): tensor, pil_img = _create_data(26, 36, device=device) @@ -556,7 +516,7 @@ def test_resize_asserts(device): F.resize(img, size=32, max_size=32) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) @pytest.mark.parametrize("size", [[96, 72], [96, 420], [420, 72]]) @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC]) @@ -609,21 +569,6 @@ def test_resize_antialias(device, dt, size, interpolation): assert_equal(resized_tensor, resize_result) -@needs_cuda -@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC]) -def test_assert_resize_antialias(interpolation): - - # Checks implementation on very large scales - # and catch TORCH_CHECK inside PyTorch implementation - torch.manual_seed(12) - tensor, _ = _create_data(1000, 1000, device="cuda") - - # Error message is not yet updated in pytorch nightly - # with pytest.raises(RuntimeError, match=r"Provided interpolation parameters can not be handled"): - with pytest.raises(RuntimeError, match=r"Too much shared memory required"): - F.resize(tensor, size=(5, 5), interpolation=interpolation, antialias=True) - - def test_resize_antialias_default_warning(): img = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8) @@ -641,25 +586,6 @@ def test_resize_antialias_default_warning(): F.resized_crop(img, 0, 0, 10, 10, size=(20, 20), interpolation=NEAREST) -@pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize("dt", [torch.float32, torch.float64, torch.float16]) -@pytest.mark.parametrize("size", [[10, 7], [10, 42], [42, 7]]) -@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC]) -def test_interpolate_antialias_backward(device, dt, size, interpolation): - - if dt == torch.float16 and device == "cpu": - # skip float16 on CPU case - return - - torch.manual_seed(12) - x = (torch.rand(1, 32, 29, 3, dtype=torch.double, device=device).permute(0, 3, 1, 2).requires_grad_(True),) - resize = partial(F.resize, size=size, interpolation=interpolation, antialias=True) - assert torch.autograd.gradcheck(resize, x, eps=1e-8, atol=1e-6, rtol=1e-6, fast_mode=False) - - x = (torch.rand(1, 3, 32, 29, dtype=torch.double, device=device, requires_grad=True),) - assert torch.autograd.gradcheck(resize, x, eps=1e-8, atol=1e-6, rtol=1e-6, fast_mode=False) - - def check_functional_vs_PIL_vs_scripted( fn, fn_pil, fn_t, config, device, dtype, channels=3, tol=2.0 + 1e-10, agg_method="max" ): @@ -697,7 +623,7 @@ def check_functional_vs_PIL_vs_scripted( _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=atol, **config) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"brightness_factor": f} for f in (0.1, 0.5, 1.0, 1.34, 2.5)]) @pytest.mark.parametrize("channels", [1, 3]) @@ -713,7 +639,7 @@ def test_adjust_brightness(device, dtype, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("channels", [1, 3]) def test_invert(device, dtype, channels): @@ -722,7 +648,7 @@ def test_invert(device, dtype, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("config", [{"bits": bits} for bits in range(0, 8)]) @pytest.mark.parametrize("channels", [1, 3]) def test_posterize(device, config, channels): @@ -739,7 +665,7 @@ def test_posterize(device, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0, 64, 128, 192, 255]]) @pytest.mark.parametrize("channels", [1, 3]) def test_solarize1(device, config, channels): @@ -756,7 +682,7 @@ def test_solarize1(device, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0.0, 0.25, 0.5, 0.75, 1.0]]) @pytest.mark.parametrize("channels", [1, 3]) @@ -788,7 +714,7 @@ def test_solarize2(device, dtype, config, channels): *[(torch.int64, threshold) for threshold in [0, 2**32, 2**63 - 1]], ], ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_solarize_threshold_within_bound(threshold, dtype, device): make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max) img = make_img((3, 12, 23), dtype=dtype, device=device) @@ -804,7 +730,7 @@ def test_solarize_threshold_within_bound(threshold, dtype, device): (torch.int64, 2**64), ], ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_solarize_threshold_above_bound(threshold, dtype, device): make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max) img = make_img((3, 12, 23), dtype=dtype, device=device) @@ -812,7 +738,7 @@ def test_solarize_threshold_above_bound(threshold, dtype, device): F_t.solarize(img, threshold) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"sharpness_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]]) @pytest.mark.parametrize("channels", [1, 3]) @@ -828,7 +754,7 @@ def test_adjust_sharpness(device, dtype, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("channels", [1, 3]) def test_autocontrast(device, dtype, channels): @@ -837,7 +763,7 @@ def test_autocontrast(device, dtype, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("channels", [1, 3]) def test_autocontrast_equal_minmax(device, dtype, channels): @@ -849,7 +775,7 @@ def test_autocontrast_equal_minmax(device, dtype, channels): assert (F.autocontrast(a)[0] == F.autocontrast(a[0])).all() -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("channels", [1, 3]) def test_equalize(device, channels): torch.use_deterministic_algorithms(False) @@ -866,7 +792,7 @@ def test_equalize(device, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"contrast_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]]) @pytest.mark.parametrize("channels", [1, 3]) @@ -876,7 +802,7 @@ def test_adjust_contrast(device, dtype, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"saturation_factor": f} for f in [0.5, 0.75, 1.0, 1.5, 2.0]]) @pytest.mark.parametrize("channels", [1, 3]) @@ -886,7 +812,7 @@ def test_adjust_saturation(device, dtype, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]]) @pytest.mark.parametrize("channels", [1, 3]) @@ -896,7 +822,7 @@ def test_adjust_hue(device, dtype, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64)) @pytest.mark.parametrize("config", [{"gamma": g1, "gain": g2} for g1, g2 in zip([0.8, 1.0, 1.2], [0.7, 1.0, 1.3])]) @pytest.mark.parametrize("channels", [1, 3]) @@ -912,7 +838,7 @@ def test_adjust_gamma(device, dtype, config, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) @pytest.mark.parametrize("pad", [2, [3], [0, 3], (3, 3), [4, 2, 4, 3]]) @pytest.mark.parametrize( @@ -962,7 +888,7 @@ def test_pad(device, dt, pad, config): _test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **config) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("mode", [NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC]) def test_resized_crop(device, mode): # test values of F.resized_crop in several cases: @@ -997,7 +923,7 @@ def test_resized_crop(device, mode): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "func, args", [ @@ -1030,7 +956,7 @@ def test_assert_image_tensor(device, func, args): func(tensor, *args) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_vflip(device): script_vflip = torch.jit.script(F.vflip) @@ -1047,7 +973,7 @@ def test_vflip(device): _test_fn_on_batch(batch_tensors, F.vflip) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_hflip(device): script_hflip = torch.jit.script(F.hflip) @@ -1064,7 +990,7 @@ def test_hflip(device): _test_fn_on_batch(batch_tensors, F.hflip) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "top, left, height, width", [ @@ -1093,7 +1019,7 @@ def test_crop(device, top, left, height, width): _test_fn_on_batch(batch_tensors, F.crop, top=top, left=left, height=height, width=width) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("image_size", ("small", "large")) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)]) @@ -1147,7 +1073,7 @@ def test_gaussian_blur(device, image_size, dt, ksize, sigma, fn): torch.testing.assert_close(out, true_out, rtol=0.0, atol=1.0, msg=f"{ksize}, {sigma}") -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_hsv2rgb(device): scripted_fn = torch.jit.script(F_t._hsv2rgb) shape = (3, 100, 150) @@ -1178,7 +1104,7 @@ def test_hsv2rgb(device): _test_fn_on_batch(batch_tensors, F_t._hsv2rgb) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_rgb2hsv(device): scripted_fn = torch.jit.script(F_t._rgb2hsv) shape = (3, 150, 100) @@ -1217,7 +1143,7 @@ def test_rgb2hsv(device): _test_fn_on_batch(batch_tensors, F_t._rgb2hsv) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("num_output_channels", (3, 1)) def test_rgb_to_grayscale(device, num_output_channels): script_rgb_to_grayscale = torch.jit.script(F.rgb_to_grayscale) @@ -1236,7 +1162,7 @@ def test_rgb_to_grayscale(device, num_output_channels): _test_fn_on_batch(batch_tensors, F.rgb_to_grayscale, num_output_channels=num_output_channels) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_center_crop(device): script_center_crop = torch.jit.script(F.center_crop) @@ -1254,7 +1180,7 @@ def test_center_crop(device): _test_fn_on_batch(batch_tensors, F.center_crop, output_size=[10, 11]) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_five_crop(device): script_five_crop = torch.jit.script(F.five_crop) @@ -1288,7 +1214,7 @@ def test_five_crop(device): assert_equal(transformed_batch, s_transformed_batch) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_ten_crop(device): script_ten_crop = torch.jit.script(F.ten_crop) @@ -1334,7 +1260,7 @@ def test_elastic_transform_asserts(): _ = F.elastic_transform(img_tensor, displacement=torch.rand(1, 2)) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC]) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) @pytest.mark.parametrize( diff --git a/test/test_image.py b/test/test_image.py index 4c210ea7eef..b08dc2026d4 100644 --- a/test/test_image.py +++ b/test/test_image.py @@ -32,6 +32,7 @@ DAMAGED_PNG = os.path.join(IMAGE_ROOT, "damaged_png") ENCODE_JPEG = os.path.join(IMAGE_ROOT, "encode_jpeg") INTERLACED_PNG = os.path.join(IMAGE_ROOT, "interlaced_png") +TOOSMALL_PNG = os.path.join(IMAGE_ROOT, "toosmall_png") IS_WINDOWS = sys.platform in ("win32", "cygwin") PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split(".")) @@ -193,6 +194,8 @@ def test_decode_png_errors(): decode_png(torch.randint(3, 5, (300,), dtype=torch.uint8)) with pytest.raises(RuntimeError, match="Out of bound read in decode_png"): decode_png(read_file(os.path.join(DAMAGED_PNG, "sigsegv.png"))) + with pytest.raises(RuntimeError, match="Content is too small for png"): + decode_png(read_file(os.path.join(TOOSMALL_PNG, "heapbof.png"))) @pytest.mark.parametrize( diff --git a/test/test_models.py b/test/test_models.py index f6eeb7c28c8..67eb2115c85 100644 --- a/test/test_models.py +++ b/test/test_models.py @@ -15,7 +15,7 @@ import torch.fx import torch.nn as nn from _utils_internal import get_relative_path -from common_utils import cpu_and_gpu, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed +from common_utils import cpu_and_cuda, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed from PIL import Image from torchvision import models, transforms from torchvision.models import get_model_builder, list_models @@ -676,14 +676,14 @@ def vitc_b_16(**kwargs: Any): @pytest.mark.parametrize("model_fn", [vitc_b_16]) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_vitc_models(model_fn, dev): test_classification_model(model_fn, dev) @disable_tf32() # see: https://github.com/pytorch/vision/issues/7618 @pytest.mark.parametrize("model_fn", list_model_fns(models)) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_classification_model(model_fn, dev): set_rng_seed(0) defaults = { @@ -726,7 +726,7 @@ def test_classification_model(model_fn, dev): @pytest.mark.parametrize("model_fn", list_model_fns(models.segmentation)) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_segmentation_model(model_fn, dev): set_rng_seed(0) defaults = { @@ -791,7 +791,7 @@ def check_out(out): @pytest.mark.parametrize("model_fn", list_model_fns(models.detection)) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_detection_model(model_fn, dev): set_rng_seed(0) defaults = { @@ -923,7 +923,7 @@ def test_detection_model_validation(model_fn): @pytest.mark.parametrize("model_fn", list_model_fns(models.video)) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_video_model(model_fn, dev): set_rng_seed(0) # the default input shape is diff --git a/test/test_ops.py b/test/test_ops.py index 463ebb333ff..b993bce65a2 100644 --- a/test/test_ops.py +++ b/test/test_ops.py @@ -10,7 +10,7 @@ import torch import torch.fx import torch.nn.functional as F -from common_utils import assert_equal, cpu_and_gpu, needs_cuda +from common_utils import assert_equal, cpu_and_cuda, needs_cuda from PIL import Image from torch import nn, Tensor from torch.autograd import gradcheck @@ -97,7 +97,7 @@ def forward(self, imgs: Tensor, boxes: List[Tensor]) -> Tensor: class RoIOpTester(ABC): dtype = torch.float64 - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("contiguous", (True, False)) def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, deterministic=False, **kwargs): x_dtype = self.dtype if x_dtype is None else x_dtype @@ -126,7 +126,7 @@ def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, determ tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5 torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_is_leaf_node(self, device): op_obj = self.make_obj(wrap=True).to(device=device) graph_node_names = get_graph_node_names(op_obj) @@ -135,7 +135,7 @@ def test_is_leaf_node(self, device): assert len(graph_node_names[0]) == len(graph_node_names[1]) assert len(graph_node_names[0]) == 1 + op_obj.n_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.float): op_obj = self.make_obj().to(device=device) graph_module = torch.fx.symbolic_trace(op_obj) @@ -155,7 +155,7 @@ def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.floa torch.testing.assert_close(output_gt, output_fx, rtol=tol, atol=tol) @pytest.mark.parametrize("seed", range(10)) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("contiguous", (True, False)) def test_backward(self, seed, device, contiguous, deterministic=False): torch.random.manual_seed(seed) @@ -418,7 +418,7 @@ def test_boxes_shape(self): self._helper_boxes_shape(ops.roi_align) @pytest.mark.parametrize("aligned", (True, False)) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("contiguous", (True, False)) @pytest.mark.parametrize("deterministic", (True, False)) def test_forward(self, device, contiguous, deterministic, aligned, x_dtype=None, rois_dtype=None): @@ -450,7 +450,7 @@ def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype): ) @pytest.mark.parametrize("seed", range(10)) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("contiguous", (True, False)) @pytest.mark.parametrize("deterministic", (True, False)) def test_backward(self, seed, device, contiguous, deterministic): @@ -612,7 +612,7 @@ def test_msroialign_repr(self): ) assert repr(t) == expected_string - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_is_leaf_node(self, device): op_obj = self.make_obj(wrap=True).to(device=device) graph_node_names = get_graph_node_names(op_obj) @@ -885,7 +885,7 @@ def make_obj(self, in_channels=6, out_channels=2, kernel_size=(3, 2), groups=2, ) return DeformConvModuleWrapper(obj) if wrap else obj - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_is_leaf_node(self, device): op_obj = self.make_obj(wrap=True).to(device=device) graph_node_names = get_graph_node_names(op_obj) @@ -894,7 +894,7 @@ def test_is_leaf_node(self, device): assert len(graph_node_names[0]) == len(graph_node_names[1]) assert len(graph_node_names[0]) == 1 + op_obj.n_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("contiguous", (True, False)) @pytest.mark.parametrize("batch_sz", (0, 33)) def test_forward(self, device, contiguous, batch_sz, dtype=None): @@ -946,7 +946,7 @@ def test_wrong_sizes(self): wrong_mask = torch.rand_like(mask[:, :2]) layer(x, offset, wrong_mask) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("contiguous", (True, False)) @pytest.mark.parametrize("batch_sz", (0, 33)) def test_backward(self, device, contiguous, batch_sz): @@ -1411,7 +1411,7 @@ def assert_empty_loss(iou_fn, dtype, device): class TestGeneralizedBoxIouLoss: # We refer to original test: https://github.com/facebookresearch/fvcore/blob/main/tests/test_giou_loss.py - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) def test_giou_loss(self, dtype, device): box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device) @@ -1439,7 +1439,7 @@ def test_giou_loss(self, dtype, device): with pytest.raises(ValueError, match="Invalid"): ops.generalized_box_iou_loss(box1s, box2s, reduction="xyz") - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) def test_empty_inputs(self, dtype, device): assert_empty_loss(ops.generalized_box_iou_loss, dtype, device) @@ -1447,7 +1447,7 @@ def test_empty_inputs(self, dtype, device): class TestCompleteBoxIouLoss: @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_ciou_loss(self, dtype, device): box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device) @@ -1461,14 +1461,14 @@ def test_ciou_loss(self, dtype, device): with pytest.raises(ValueError, match="Invalid"): ops.complete_box_iou_loss(box1s, box2s, reduction="xyz") - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) def test_empty_inputs(self, dtype, device): assert_empty_loss(ops.complete_box_iou_loss, dtype, device) class TestDistanceBoxIouLoss: - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) def test_distance_iou_loss(self, dtype, device): box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device) @@ -1483,7 +1483,7 @@ def test_distance_iou_loss(self, dtype, device): with pytest.raises(ValueError, match="Invalid"): ops.distance_box_iou_loss(box1s, box2s, reduction="xyz") - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) def test_empty_distance_iou_inputs(self, dtype, device): assert_empty_loss(ops.distance_box_iou_loss, dtype, device) @@ -1528,7 +1528,7 @@ def generate_tensor_with_range_type(shape, range_type, **kwargs): @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0]) @pytest.mark.parametrize("gamma", [0, 2]) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) @pytest.mark.parametrize("seed", [0, 1]) def test_correct_ratio(self, alpha, gamma, device, dtype, seed): @@ -1557,7 +1557,7 @@ def test_correct_ratio(self, alpha, gamma, device, dtype, seed): torch.testing.assert_close(correct_ratio, loss_ratio, atol=tol, rtol=tol) @pytest.mark.parametrize("reduction", ["mean", "sum"]) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) @pytest.mark.parametrize("seed", [2, 3]) def test_equal_ce_loss(self, reduction, device, dtype, seed): @@ -1584,7 +1584,7 @@ def test_equal_ce_loss(self, reduction, device, dtype, seed): @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0]) @pytest.mark.parametrize("gamma", [0, 2]) @pytest.mark.parametrize("reduction", ["none", "mean", "sum"]) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) @pytest.mark.parametrize("seed", [4, 5]) def test_jit(self, alpha, gamma, reduction, device, dtype, seed): @@ -1600,7 +1600,7 @@ def test_jit(self, alpha, gamma, reduction, device, dtype, seed): torch.testing.assert_close(focal_loss, scripted_focal_loss, rtol=tol, atol=tol) # Raise ValueError for anonymous reduction mode - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dtype", [torch.float32, torch.half]) def test_reduction_mode(self, device, dtype, reduction="xyz"): if device == "cpu" and dtype is torch.half: diff --git a/test/test_prototype_datapoints.py b/test/test_prototype_datapoints.py deleted file mode 100644 index 04e3cd67f96..00000000000 --- a/test/test_prototype_datapoints.py +++ /dev/null @@ -1,133 +0,0 @@ -import pytest -import torch - -from torchvision.prototype import datapoints as proto_datapoints - - -@pytest.mark.parametrize( - ("data", "input_requires_grad", "expected_requires_grad"), - [ - ([0.0], None, False), - ([0.0], False, False), - ([0.0], True, True), - (torch.tensor([0.0], requires_grad=False), None, False), - (torch.tensor([0.0], requires_grad=False), False, False), - (torch.tensor([0.0], requires_grad=False), True, True), - (torch.tensor([0.0], requires_grad=True), None, True), - (torch.tensor([0.0], requires_grad=True), False, False), - (torch.tensor([0.0], requires_grad=True), True, True), - ], -) -def test_new_requires_grad(data, input_requires_grad, expected_requires_grad): - datapoint = proto_datapoints.Label(data, requires_grad=input_requires_grad) - assert datapoint.requires_grad is expected_requires_grad - - -def test_isinstance(): - assert isinstance( - proto_datapoints.Label([0, 1, 0], categories=["foo", "bar"]), - torch.Tensor, - ) - - -def test_wrapping_no_copy(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - assert label.data_ptr() == tensor.data_ptr() - - -def test_to_wrapping(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - label_to = label.to(torch.int32) - - assert type(label_to) is proto_datapoints.Label - assert label_to.dtype is torch.int32 - assert label_to.categories is label.categories - - -def test_to_datapoint_reference(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]).to(torch.int32) - - tensor_to = tensor.to(label) - - assert type(tensor_to) is torch.Tensor - assert tensor_to.dtype is torch.int32 - - -def test_clone_wrapping(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - label_clone = label.clone() - - assert type(label_clone) is proto_datapoints.Label - assert label_clone.data_ptr() != label.data_ptr() - assert label_clone.categories is label.categories - - -def test_requires_grad__wrapping(): - tensor = torch.tensor([0, 1, 0], dtype=torch.float32) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - assert not label.requires_grad - - label_requires_grad = label.requires_grad_(True) - - assert type(label_requires_grad) is proto_datapoints.Label - assert label.requires_grad - assert label_requires_grad.requires_grad - - -def test_other_op_no_wrapping(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - # any operation besides .to() and .clone() will do here - output = label * 2 - - assert type(output) is torch.Tensor - - -@pytest.mark.parametrize( - "op", - [ - lambda t: t.numpy(), - lambda t: t.tolist(), - lambda t: t.max(dim=-1), - ], -) -def test_no_tensor_output_op_no_wrapping(op): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - output = op(label) - - assert type(output) is not proto_datapoints.Label - - -def test_inplace_op_no_wrapping(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - output = label.add_(0) - - assert type(output) is torch.Tensor - assert type(label) is proto_datapoints.Label - - -def test_wrap_like(): - tensor = torch.tensor([0, 1, 0], dtype=torch.int64) - label = proto_datapoints.Label(tensor, categories=["foo", "bar"]) - - # any operation besides .to() and .clone() will do here - output = label * 2 - - label_new = proto_datapoints.Label.wrap_like(label, output) - - assert type(label_new) is proto_datapoints.Label - assert label_new.data_ptr() == output.data_ptr() - assert label_new.categories is label.categories diff --git a/test/test_prototype_models.py b/test/test_prototype_models.py index 6d9f22c1543..d32df68f1f4 100644 --- a/test/test_prototype_models.py +++ b/test/test_prototype_models.py @@ -1,13 +1,13 @@ import pytest import test_models as TM import torch -from common_utils import cpu_and_gpu, set_rng_seed +from common_utils import cpu_and_cuda, set_rng_seed from torchvision.prototype import models @pytest.mark.parametrize("model_fn", (models.depth.stereo.raft_stereo_base,)) @pytest.mark.parametrize("model_mode", ("standard", "scripted")) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_raft_stereo(model_fn, model_mode, dev): # A simple test to make sure the model can do forward pass and jit scriptable set_rng_seed(0) @@ -40,7 +40,7 @@ def test_raft_stereo(model_fn, model_mode, dev): @pytest.mark.parametrize("model_fn", (models.depth.stereo.crestereo_base,)) @pytest.mark.parametrize("model_mode", ("standard", "scripted")) -@pytest.mark.parametrize("dev", cpu_and_gpu()) +@pytest.mark.parametrize("dev", cpu_and_cuda()) def test_crestereo(model_fn, model_mode, dev): set_rng_seed(0) diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py index 255c3b5c32f..c574979e22c 100644 --- a/test/test_prototype_transforms.py +++ b/test/test_prototype_transforms.py @@ -216,7 +216,7 @@ def test__get_params(self, mocker): flat_inputs = [ make_image(size=spatial_size, color_space="RGB"), - make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape), + make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape), ] params = transform._get_params(flat_inputs) @@ -312,9 +312,9 @@ def test__transform_culling(self, mocker): ) bounding_boxes = make_bounding_box( - format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) - masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,)) + masks = make_detection_mask(size=spatial_size, batch_dims=(batch_size,)) labels = make_label(extra_dims=(batch_size,)) transform = transforms.FixedSizeCrop((-1, -1)) @@ -350,7 +350,7 @@ def test__transform_bounding_box_clamping(self, mocker): ) bounding_box = make_bounding_box( - format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,) + format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,) ) mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box") @@ -496,7 +496,7 @@ def make_datapoints(): pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } @@ -505,7 +505,7 @@ def make_datapoints(): tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } @@ -514,7 +514,7 @@ def make_datapoints(): datapoint_image = make_image(size=size, color_space="RGB") target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long), } diff --git a/test/test_transforms.py b/test/test_transforms.py index 41075c6514a..7581bf33220 100644 --- a/test/test_transforms.py +++ b/test/test_transforms.py @@ -952,33 +952,6 @@ def test_adjust_contrast(): torch.testing.assert_close(y_np, y_ans) -@pytest.mark.skipif(Image.__version__ >= "7", reason="Temporarily disabled") -def test_adjust_saturation(): - x_shape = [2, 2, 3] - x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] - x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape) - x_pil = Image.fromarray(x_np, mode="RGB") - - # test 0 - y_pil = F.adjust_saturation(x_pil, 1) - y_np = np.array(y_pil) - torch.testing.assert_close(y_np, x_np) - - # test 1 - y_pil = F.adjust_saturation(x_pil, 0.5) - y_np = np.array(y_pil) - y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - torch.testing.assert_close(y_np, y_ans) - - # test 2 - y_pil = F.adjust_saturation(x_pil, 2) - y_np = np.array(y_pil) - y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0] - y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape) - torch.testing.assert_close(y_np, y_ans) - - def test_adjust_hue(): x_shape = [2, 2, 3] x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1] diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py index 077a12af490..e2ab5673f1e 100644 --- a/test/test_transforms_tensor.py +++ b/test/test_transforms_tensor.py @@ -12,7 +12,7 @@ _create_data, _create_data_batch, assert_equal, - cpu_and_gpu, + cpu_and_cuda, float_dtypes, get_tmp_dir, int_dtypes, @@ -105,7 +105,7 @@ def _test_fn_save_load(fn, tmpdir): _ = torch.jit.load(p) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "func,method,fn_kwargs,match_kwargs", [ @@ -130,7 +130,7 @@ def test_random(func, method, device, channels, fn_kwargs, match_kwargs): @pytest.mark.parametrize("seed", range(10)) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("channels", [1, 3]) class TestColorJitter: @pytest.fixture(autouse=True) @@ -206,7 +206,7 @@ def test_color_jitter_all(self, device, channels): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("m", ["constant", "edge", "reflect", "symmetric"]) @pytest.mark.parametrize("mul", [1, -1]) def test_pad(m, mul, device): @@ -229,7 +229,7 @@ def test_pad(m, mul, device): _test_op(F.pad, T.Pad, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_crop(device): fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5} # Test transforms.RandomCrop with size and padding as tuple @@ -257,7 +257,7 @@ def test_crop(device): _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "padding_config", [ @@ -283,7 +283,7 @@ def test_random_crop_save_load(tmpdir): _test_fn_save_load(fn, tmpdir) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_center_crop(device, tmpdir): fn_kwargs = {"output_size": (4, 5)} meth_kwargs = {"size": (4, 5)} @@ -313,7 +313,7 @@ def test_center_crop_save_load(tmpdir): _test_fn_save_load(fn, tmpdir) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "fn, method, out_length", [ @@ -380,7 +380,7 @@ def test_resize_int(self, size): assert y.shape[1] == size assert y.shape[2] == int(size * 46 / 32) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64]) @pytest.mark.parametrize("size", [[32], [32, 32], (32, 32), [34, 35]]) @pytest.mark.parametrize("max_size", [None, 35, 1000]) @@ -404,7 +404,7 @@ def test_resize_save_load(self, tmpdir): fn = T.Resize(size=[32], antialias=True) _test_fn_save_load(fn, tmpdir) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]]) @pytest.mark.parametrize("ratio", [(0.75, 1.333), [0.75, 1.333]]) @pytest.mark.parametrize("size", [(32,), [44], [32], [32, 32], (32, 32), [44, 55]]) @@ -460,42 +460,42 @@ def test_random_affine_save_load(tmpdir): _test_fn_save_load(fn, tmpdir) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) @pytest.mark.parametrize("shear", [15, 10.0, (5.0, 10.0), [-15, 15], [-10.0, 10.0, -11.0, 11.0]]) def test_random_affine_shear(device, interpolation, shear): _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, shear=shear) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]]) def test_random_affine_scale(device, interpolation, scale): _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, scale=scale) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) @pytest.mark.parametrize("translate", [(0.1, 0.2), [0.2, 0.1]]) def test_random_affine_translate(device, interpolation, translate): _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, translate=translate) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]]) def test_random_affine_degrees(device, interpolation, degrees): _test_random_affine_helper(device, degrees=degrees, interpolation=interpolation) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) @pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_random_affine_fill(device, interpolation, fill): _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, fill=fill) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("center", [(0, 0), [10, 10], None, (56, 44)]) @pytest.mark.parametrize("expand", [True, False]) @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]]) @@ -517,7 +517,7 @@ def test_random_rotate_save_load(tmpdir): _test_fn_save_load(fn, tmpdir) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("distortion_scale", np.linspace(0.1, 1.0, num=20)) @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR]) @pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) @@ -537,7 +537,7 @@ def test_random_perspective_save_load(tmpdir): _test_fn_save_load(fn, tmpdir) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "Klass, meth_kwargs", [(T.Grayscale, {"num_output_channels": 1}), (T.Grayscale, {"num_output_channels": 3}), (T.RandomGrayscale, {})], @@ -547,7 +547,7 @@ def test_to_grayscale(device, Klass, meth_kwargs): _test_class_op(Klass, meth_kwargs=meth_kwargs, test_exact_match=False, device=device, tol=tol, agg_method="max") -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("in_dtype", int_dtypes() + float_dtypes()) @pytest.mark.parametrize("out_dtype", int_dtypes() + float_dtypes()) def test_convert_image_dtype(device, in_dtype, out_dtype): @@ -578,7 +578,7 @@ def test_convert_image_dtype_save_load(tmpdir): _test_fn_save_load(fn, tmpdir) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("policy", [policy for policy in T.AutoAugmentPolicy]) @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_autoaugment(device, policy, fill): @@ -592,7 +592,7 @@ def test_autoaugment(device, policy, fill): _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("num_ops", [1, 2, 3]) @pytest.mark.parametrize("magnitude", [7, 9, 11]) @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) @@ -607,7 +607,7 @@ def test_randaugment(device, num_ops, magnitude, fill): _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_trivialaugmentwide(device, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) @@ -620,7 +620,7 @@ def test_trivialaugmentwide(device, fill): _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1]) def test_augmix(device, fill): tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device) @@ -686,7 +686,7 @@ def shear(pil_img, level, mode, resample): _assert_approx_equal_tensor_to_pil(out, expected_out) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "config", [ @@ -724,7 +724,7 @@ def test_random_erasing_with_invalid_data(): random_erasing(img) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_normalize(device, tmpdir): fn = T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5)) tensor, _ = _create_data(26, 34, device=device) @@ -743,7 +743,7 @@ def test_normalize(device, tmpdir): scripted_fn.save(os.path.join(tmpdir, "t_norm.pt")) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_linear_transformation(device, tmpdir): c, h, w = 3, 24, 32 @@ -769,7 +769,7 @@ def test_linear_transformation(device, tmpdir): scripted_fn.save(os.path.join(tmpdir, "t_norm.pt")) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_compose(device): tensor, _ = _create_data(26, 34, device=device) tensor = tensor.to(dtype=torch.float32) / 255.0 @@ -797,7 +797,7 @@ def test_compose(device): torch.jit.script(t) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_random_apply(device): tensor, _ = _create_data(26, 34, device=device) tensor = tensor.to(dtype=torch.float32) / 255.0 @@ -839,7 +839,7 @@ def test_random_apply(device): torch.jit.script(transforms) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "meth_kwargs", [ @@ -877,7 +877,7 @@ def test_gaussian_blur(device, channels, meth_kwargs): ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "fill", [ diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py index bfb403c7abe..755a7b0350c 100644 --- a/test/test_transforms_v2.py +++ b/test/test_transforms_v2.py @@ -16,7 +16,7 @@ from common_utils import ( assert_equal, assert_run_python_script, - cpu_and_gpu, + cpu_and_cuda, make_bounding_box, make_bounding_boxes, make_detection_mask, @@ -29,7 +29,7 @@ from torch.utils._pytree import tree_flatten, tree_unflatten from torchvision import datapoints from torchvision.ops.boxes import box_iou -from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image +from torchvision.transforms.functional import InterpolationMode, to_pil_image from torchvision.transforms.v2 import functional as F from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw @@ -199,7 +199,7 @@ class TestSmoke: next(make_vanilla_tensor_images()), ], ) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_common(self, transform, adapter, container_type, image_or_video, device): spatial_size = F.get_spatial_size(image_or_video) input = dict( @@ -208,13 +208,13 @@ def test_common(self, transform, adapter, container_type, image_or_video, device video_datapoint=make_video(size=spatial_size), image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])), bounding_box_xyxy=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,) + format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,) ), bounding_box_xywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,) + format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, batch_dims=(4,) ), bounding_box_cxcywh=make_bounding_box( - format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,) + format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, batch_dims=(5,) ), bounding_box_degenerate_xyxy=datapoints.BoundingBox( [ @@ -315,7 +315,7 @@ def test_common(self, transform, adapter, container_type, image_or_video, device ], dtypes=[torch.uint8], extra_dims=[(), (4,)], - **(dict(num_frames=["random"]) if fn is make_videos else dict()), + **(dict(num_frames=[3]) if fn is make_videos else dict()), ) for fn in [ make_images, @@ -463,112 +463,6 @@ def was_applied(output, inpt): assert transform.was_applied(output, input) -@pytest.mark.parametrize("p", [0.0, 1.0]) -class TestRandomHorizontalFlip: - def input_expected_image_tensor(self, p, dtype=torch.float32): - input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype) - expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype) - - return input, expected if p == 1 else input - - def test_simple_tensor(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(input) - - assert_equal(expected, actual) - - def test_pil_image(self, p): - input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(to_pil_image(input)) - - assert_equal(expected, pil_to_tensor(actual)) - - def test_datapoints_image(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(datapoints.Image(input)) - - assert_equal(datapoints.Image(expected), actual) - - def test_datapoints_mask(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(datapoints.Mask(input)) - - assert_equal(datapoints.Mask(expected), actual) - - def test_datapoints_bounding_box(self, p): - input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)) - transform = transforms.RandomHorizontalFlip(p=p) - - actual = transform(input) - - expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input - expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor) - assert_equal(expected, actual) - assert actual.format == expected.format - assert actual.spatial_size == expected.spatial_size - - -@pytest.mark.parametrize("p", [0.0, 1.0]) -class TestRandomVerticalFlip: - def input_expected_image_tensor(self, p, dtype=torch.float32): - input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype) - expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype) - - return input, expected if p == 1 else input - - def test_simple_tensor(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(input) - - assert_equal(expected, actual) - - def test_pil_image(self, p): - input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(to_pil_image(input)) - - assert_equal(expected, pil_to_tensor(actual)) - - def test_datapoints_image(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(datapoints.Image(input)) - - assert_equal(datapoints.Image(expected), actual) - - def test_datapoints_mask(self, p): - input, expected = self.input_expected_image_tensor(p) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(datapoints.Mask(input)) - - assert_equal(datapoints.Mask(expected), actual) - - def test_datapoints_bounding_box(self, p): - input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)) - transform = transforms.RandomVerticalFlip(p=p) - - actual = transform(input) - - expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input - expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor) - assert_equal(expected, actual) - assert actual.format == expected.format - assert actual.spatial_size == expected.spatial_size - - class TestPad: def test_assertions(self): with pytest.raises(TypeError, match="Got inappropriate padding arg"): @@ -704,204 +598,6 @@ def test__transform_image_mask(self, fill, mocker): fn.assert_has_calls(calls) -class TestRandomRotation: - def test_assertions(self): - with pytest.raises(ValueError, match="is a single number, it must be positive"): - transforms.RandomRotation(-0.7) - - for d in [[-0.7], [-0.7, 0, 0.7]]: - with pytest.raises(ValueError, match="degrees should be a sequence of length 2"): - transforms.RandomRotation(d) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomRotation(12, fill="abc") - - with pytest.raises(TypeError, match="center should be a sequence of length"): - transforms.RandomRotation(12, center=12) - - with pytest.raises(ValueError, match="center should be a sequence of length"): - transforms.RandomRotation(12, center=[1, 2, 3]) - - def test__get_params(self): - angle_bound = 34 - transform = transforms.RandomRotation(angle_bound) - - params = transform._get_params(None) - assert -angle_bound <= params["angle"] <= angle_bound - - angle_bounds = [12, 34] - transform = transforms.RandomRotation(angle_bounds) - - params = transform._get_params(None) - assert angle_bounds[0] <= params["angle"] <= angle_bounds[1] - - @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) - @pytest.mark.parametrize("expand", [False, True]) - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("center", [None, [2.0, 3.0]]) - def test__transform(self, degrees, expand, fill, center, mocker): - interpolation = InterpolationMode.BILINEAR - transform = transforms.RandomRotation( - degrees, interpolation=interpolation, expand=expand, fill=fill, center=center - ) - - if isinstance(degrees, (tuple, list)): - assert transform.degrees == [float(degrees[0]), float(degrees[1])] - else: - assert transform.degrees == [float(-degrees), float(degrees)] - - fn = mocker.patch("torchvision.transforms.v2.functional.rotate") - inpt = mocker.MagicMock(spec=datapoints.Image) - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - params = transform._get_params(inpt) - - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center) - - @pytest.mark.parametrize("angle", [34, -87]) - @pytest.mark.parametrize("expand", [False, True]) - def test_boundingbox_spatial_size(self, angle, expand): - # Specific test for BoundingBox.rotate - bbox = datapoints.BoundingBox( - torch.tensor([1, 2, 3, 4]), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(32, 32) - ) - img = datapoints.Image(torch.rand(1, 3, 32, 32)) - - out_img = img.rotate(angle, expand=expand) - out_bbox = bbox.rotate(angle, expand=expand) - - assert out_img.spatial_size == out_bbox.spatial_size - - -class TestRandomAffine: - def test_assertions(self): - with pytest.raises(ValueError, match="is a single number, it must be positive"): - transforms.RandomAffine(-0.7) - - for d in [[-0.7], [-0.7, 0, 0.7]]: - with pytest.raises(ValueError, match="degrees should be a sequence of length 2"): - transforms.RandomAffine(d) - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomAffine(12, fill="abc") - - with pytest.raises(TypeError, match="Got inappropriate fill arg"): - transforms.RandomAffine(12, fill="abc") - - for kwargs in [ - {"center": 12}, - {"translate": 12}, - {"scale": 12}, - ]: - with pytest.raises(TypeError, match="should be a sequence of length"): - transforms.RandomAffine(12, **kwargs) - - for kwargs in [{"center": [1, 2, 3]}, {"translate": [1, 2, 3]}, {"scale": [1, 2, 3]}]: - with pytest.raises(ValueError, match="should be a sequence of length"): - transforms.RandomAffine(12, **kwargs) - - with pytest.raises(ValueError, match="translation values should be between 0 and 1"): - transforms.RandomAffine(12, translate=[-1.0, 2.0]) - - with pytest.raises(ValueError, match="scale values should be positive"): - transforms.RandomAffine(12, scale=[-1.0, 2.0]) - - with pytest.raises(ValueError, match="is a single number, it must be positive"): - transforms.RandomAffine(12, shear=-10) - - for s in [[-0.7], [-0.7, 0, 0.7]]: - with pytest.raises(ValueError, match="shear should be a sequence of length 2"): - transforms.RandomAffine(12, shear=s) - - @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) - @pytest.mark.parametrize("translate", [None, [0.1, 0.2]]) - @pytest.mark.parametrize("scale", [None, [0.7, 1.2]]) - @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]]) - def test__get_params(self, degrees, translate, scale, shear, mocker): - image = mocker.MagicMock(spec=datapoints.Image) - image.num_channels = 3 - image.spatial_size = (24, 32) - h, w = image.spatial_size - - transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear) - params = transform._get_params([image]) - - if not isinstance(degrees, (list, tuple)): - assert -degrees <= params["angle"] <= degrees - else: - assert degrees[0] <= params["angle"] <= degrees[1] - - if translate is not None: - w_max = int(round(translate[0] * w)) - h_max = int(round(translate[1] * h)) - assert -w_max <= params["translate"][0] <= w_max - assert -h_max <= params["translate"][1] <= h_max - else: - assert params["translate"] == (0, 0) - - if scale is not None: - assert scale[0] <= params["scale"] <= scale[1] - else: - assert params["scale"] == 1.0 - - if shear is not None: - if isinstance(shear, float): - assert -shear <= params["shear"][0] <= shear - assert params["shear"][1] == 0.0 - elif len(shear) == 2: - assert shear[0] <= params["shear"][0] <= shear[1] - assert params["shear"][1] == 0.0 - else: - assert shear[0] <= params["shear"][0] <= shear[1] - assert shear[2] <= params["shear"][1] <= shear[3] - else: - assert params["shear"] == (0, 0) - - @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)]) - @pytest.mark.parametrize("translate", [None, [0.1, 0.2]]) - @pytest.mark.parametrize("scale", [None, [0.7, 1.2]]) - @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]]) - @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)]) - @pytest.mark.parametrize("center", [None, [2.0, 3.0]]) - def test__transform(self, degrees, translate, scale, shear, fill, center, mocker): - interpolation = InterpolationMode.BILINEAR - transform = transforms.RandomAffine( - degrees, - translate=translate, - scale=scale, - shear=shear, - interpolation=interpolation, - fill=fill, - center=center, - ) - - if isinstance(degrees, (tuple, list)): - assert transform.degrees == [float(degrees[0]), float(degrees[1])] - else: - assert transform.degrees == [float(-degrees), float(degrees)] - - fn = mocker.patch("torchvision.transforms.v2.functional.affine") - inpt = mocker.MagicMock(spec=datapoints.Image) - inpt.num_channels = 3 - inpt.spatial_size = (24, 32) - - # vfdev-5, Feature Request: let's store params as Transform attribute - # This could be also helpful for users - # Otherwise, we can mock transform._get_params - torch.manual_seed(12) - _ = transform(inpt) - torch.manual_seed(12) - params = transform._get_params([inpt]) - - fill = transforms._utils._convert_fill_arg(fill) - fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center) - - class TestRandomCrop: def test_assertions(self): with pytest.raises(ValueError, match="Please provide only two dimensions"): @@ -1421,7 +1117,7 @@ def test_assertions(self): class TestRandomIoUCrop: - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]]) def test__get_params(self, device, options, mocker): image = mocker.MagicMock(spec=datapoints.Image) @@ -1485,7 +1181,7 @@ def test__transform(self, mocker): transform = transforms.RandomIoUCrop() image = datapoints.Image(torch.rand(3, 32, 24)) - bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,)) + bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), batch_dims=(6,)) masks = make_detection_mask((32, 24), num_objects=6) sample = [image, bboxes, masks] @@ -1768,8 +1464,6 @@ def test_antialias_warning(): tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8) match = "The default value of the antialias parameter" - with pytest.warns(UserWarning, match=match): - transforms.Resize((20, 20))(tensor_img) with pytest.warns(UserWarning, match=match): transforms.RandomResizedCrop((20, 20))(tensor_img) with pytest.warns(UserWarning, match=match): @@ -1779,18 +1473,6 @@ def test_antialias_warning(): with pytest.warns(UserWarning, match=match): transforms.RandomResize(10, 20)(tensor_img) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize(tensor_img, (20, 20)) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize_image_tensor(tensor_img, (20, 20)) - - with pytest.warns(UserWarning, match=match): - transforms.functional.resize(tensor_video, (20, 20)) - with pytest.warns(UserWarning, match=match): - transforms.functional.resize_video(tensor_video, (20, 20)) - - with pytest.warns(UserWarning, match=match): - datapoints.Image(tensor_img).resize((20, 20)) with pytest.warns(UserWarning, match=match): datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20)) @@ -1801,27 +1483,17 @@ def test_antialias_warning(): with warnings.catch_warnings(): warnings.simplefilter("error") - transforms.Resize((20, 20))(pil_img) transforms.RandomResizedCrop((20, 20))(pil_img) transforms.ScaleJitter((20, 20))(pil_img) transforms.RandomShortestSize((20, 20))(pil_img) transforms.RandomResize(10, 20)(pil_img) - transforms.functional.resize(pil_img, (20, 20)) - transforms.Resize((20, 20), antialias=True)(tensor_img) transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img) transforms.ScaleJitter((20, 20), antialias=True)(tensor_img) transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img) transforms.RandomResize(10, 20, antialias=True)(tensor_img) - transforms.functional.resize(tensor_img, (20, 20), antialias=True) - transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True) - transforms.functional.resize(tensor_video, (20, 20), antialias=True) - transforms.functional.resize_video(tensor_video, (20, 20), antialias=True) - - datapoints.Image(tensor_img).resize((20, 20), antialias=True) datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) - datapoints.Video(tensor_video).resize((20, 20), antialias=True) datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True) diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py index e541feaf1eb..bf297473bc2 100644 --- a/test/test_transforms_v2_consistency.py +++ b/test/test_transforms_v2_consistency.py @@ -22,6 +22,7 @@ make_image, make_images, make_segmentation_mask, + set_rng_seed, ) from torch import nn from torchvision import datapoints, transforms as legacy_transforms @@ -35,6 +36,12 @@ DEFAULT_MAKE_IMAGES_KWARGS = dict(color_spaces=["RGB"], extra_dims=[(4,)]) +@pytest.fixture(autouse=True) +def fix_rng_seed(): + set_rng_seed(0) + yield + + class NotScriptableArgsKwargs(ArgsKwargs): """ This class is used to mark parameters that render the transform non-scriptable. They still work in eager mode and @@ -1083,7 +1090,7 @@ def make_label(extra_dims, categories): pil_image = to_image_pil(make_image(size=size, color_space="RGB")) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1091,9 +1098,9 @@ def make_label(extra_dims, categories): yield (pil_image, target) - tensor_image = torch.Tensor(make_image(size=size, color_space="RGB")) + tensor_image = torch.Tensor(make_image(size=size, color_space="RGB", dtype=torch.float32)) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: @@ -1101,9 +1108,9 @@ def make_label(extra_dims, categories): yield (tensor_image, target) - datapoint_image = make_image(size=size, color_space="RGB") + datapoint_image = make_image(size=size, color_space="RGB", dtype=torch.float32) target = { - "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float), + "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float), "labels": make_label(extra_dims=(num_objects,), categories=80), } if with_mask: diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py index 60a06f571b1..465cc227107 100644 --- a/test/test_transforms_v2_functional.py +++ b/test/test_transforms_v2_functional.py @@ -14,7 +14,7 @@ from common_utils import ( assert_close, cache, - cpu_and_gpu, + cpu_and_cuda, DEFAULT_SQUARE_SPATIAL_SIZE, make_bounding_boxes, needs_cuda, @@ -120,7 +120,7 @@ class TestKernels: [info for info in KERNEL_INFOS if info.logs_usage], args_kwargs_fn=lambda info: info.sample_inputs_fn(), ) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_logging(self, spy_on, info, args_kwargs, device): spy = spy_on(torch._C._log_api_usage_once) @@ -131,7 +131,7 @@ def test_logging(self, spy_on, info, args_kwargs, device): @ignore_jit_warning_no_profile @sample_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_scripted_vs_eager(self, test_id, info, args_kwargs, device): kernel_eager = info.kernel kernel_scripted = script(kernel_eager) @@ -167,7 +167,7 @@ def _unbatch(self, batch, *, data_dims): ] @sample_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_batched_vs_single(self, test_id, info, args_kwargs, device): (batched_input, *other_args), kwargs = args_kwargs.load(device) @@ -208,7 +208,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device): ) @sample_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_no_inplace(self, info, args_kwargs, device): (input, *other_args), kwargs = args_kwargs.load(device) input = input.as_subclass(torch.Tensor) @@ -240,7 +240,7 @@ def test_cuda_vs_cpu(self, test_id, info, args_kwargs): ) @sample_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_dtype_and_device_consistency(self, info, args_kwargs, device): (input, *other_args), kwargs = args_kwargs.load(device) input = input.as_subclass(torch.Tensor) @@ -320,7 +320,7 @@ class TestDispatchers: DISPATCHER_INFOS, args_kwargs_fn=lambda info: info.sample_inputs(), ) - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_logging(self, spy_on, info, args_kwargs, device): spy = spy_on(torch._C._log_api_usage_once) @@ -331,7 +331,7 @@ def test_logging(self, spy_on, info, args_kwargs, device): @ignore_jit_warning_no_profile @image_sample_inputs - @pytest.mark.parametrize("device", cpu_and_gpu()) + @pytest.mark.parametrize("device", cpu_and_cuda()) def test_scripted_smoke(self, info, args_kwargs, device): dispatcher = script(info.dispatcher) @@ -539,6 +539,7 @@ def test_bounding_box_format_consistency(self, info, args_kwargs): (F.to_pil_image, F.to_image_pil), (F.elastic_transform, F.elastic), (F.convert_image_dtype, F.convert_dtype_image_tensor), + (F.to_grayscale, F.rgb_to_grayscale), ] ], ) @@ -553,7 +554,7 @@ def test_alias(alias, target): args_kwargs_fn=lambda info: info.sample_inputs_fn(), ), ) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device): (input, *other_args), kwargs = args_kwargs.load(device) dtype = other_args[0] if other_args else kwargs.get("dtype", torch.float32) @@ -564,7 +565,7 @@ def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device): assert output.device == input.device -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("num_channels", [1, 3]) def test_normalize_image_tensor_stats(device, num_channels): stats = pytest.importorskip("scipy.stats", reason="SciPy is not available") @@ -664,235 +665,7 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_): return true_matrix -@pytest.mark.parametrize("device", cpu_and_gpu()) -def test_correctness_affine_bounding_box_on_fixed_input(device): - # Check transformation against known expected output - format = datapoints.BoundingBoxFormat.XYXY - spatial_size = (64, 64) - in_boxes = [ - [20, 25, 35, 45], - [50, 5, 70, 22], - [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10], - [1, 1, 5, 5], - ] - in_boxes = torch.tensor(in_boxes, dtype=torch.float64, device=device) - # Tested parameters - angle = 63 - scale = 0.89 - dx = 0.12 - dy = 0.23 - - # Expected bboxes computed using albumentations: - # from albumentations.augmentations.geometric.functional import bbox_shift_scale_rotate - # from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox - # expected_bboxes = [] - # for in_box in in_boxes: - # n_in_box = normalize_bbox(in_box, *spatial_size) - # n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *spatial_size) - # out_box = denormalize_bbox(n_out_box, *spatial_size) - # expected_bboxes.append(out_box) - expected_bboxes = [ - (24.522435977922218, 34.375689508290854, 46.443125279998114, 54.3516575015695), - (54.88288587110401, 50.08453280875634, 76.44484547743795, 72.81332520036864), - (27.709526487041554, 34.74952648704156, 51.650473512958435, 58.69047351295844), - (48.56528888843238, 9.611532109828834, 53.35347829361575, 14.39972151501221), - ] - - expected_bboxes = clamp_bounding_box( - datapoints.BoundingBox(expected_bboxes, format="XYXY", spatial_size=spatial_size) - ).tolist() - - output_boxes = F.affine_bounding_box( - in_boxes, - format=format, - spatial_size=spatial_size, - angle=angle, - translate=(dx * spatial_size[1], dy * spatial_size[0]), - scale=scale, - shear=(0, 0), - ) - - torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) - - -@pytest.mark.parametrize("device", cpu_and_gpu()) -def test_correctness_affine_segmentation_mask_on_fixed_input(device): - # Check transformation against known expected output and CPU/CUDA devices - - # Create a fixed input segmentation mask with 2 square masks - # in top-left, bottom-left corners - mask = torch.zeros(1, 32, 32, dtype=torch.long, device=device) - mask[0, 2:10, 2:10] = 1 - mask[0, 32 - 9 : 32 - 3, 3:9] = 2 - - # Rotate 90 degrees and scale - expected_mask = torch.rot90(mask, k=-1, dims=(-2, -1)) - expected_mask = torch.nn.functional.interpolate(expected_mask[None, :].float(), size=(64, 64), mode="nearest") - expected_mask = expected_mask[0, :, 16 : 64 - 16, 16 : 64 - 16].long() - - out_mask = F.affine_mask(mask, 90, [0.0, 0.0], 64.0 / 32.0, [0.0, 0.0]) - - torch.testing.assert_close(out_mask, expected_mask) - - -@pytest.mark.parametrize("angle", range(-90, 90, 56)) -@pytest.mark.parametrize("expand, center", [(True, None), (False, None), (False, (12, 14))]) -def test_correctness_rotate_bounding_box(angle, expand, center): - def _compute_expected_bbox(bbox, angle_, expand_, center_): - affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_) - affine_matrix = affine_matrix[:2, :] - - height, width = bbox.spatial_size - bbox_xyxy = convert_format_bounding_box(bbox, new_format=datapoints.BoundingBoxFormat.XYXY) - points = np.array( - [ - [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], - [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], - [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], - [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], - # image frame - [0.0, 0.0, 1.0], - [0.0, height, 1.0], - [width, height, 1.0], - [width, 0.0, 1.0], - ] - ) - transformed_points = np.matmul(points, affine_matrix.T) - out_bbox = [ - float(np.min(transformed_points[:4, 0])), - float(np.min(transformed_points[:4, 1])), - float(np.max(transformed_points[:4, 0])), - float(np.max(transformed_points[:4, 1])), - ] - if expand_: - tr_x = np.min(transformed_points[4:, 0]) - tr_y = np.min(transformed_points[4:, 1]) - out_bbox[0] -= tr_x - out_bbox[1] -= tr_y - out_bbox[2] -= tr_x - out_bbox[3] -= tr_y - - height = int(height - 2 * tr_y) - width = int(width - 2 * tr_x) - - out_bbox = datapoints.BoundingBox( - out_bbox, - format=datapoints.BoundingBoxFormat.XYXY, - spatial_size=(height, width), - dtype=bbox.dtype, - device=bbox.device, - ) - out_bbox = clamp_bounding_box(convert_format_bounding_box(out_bbox, new_format=bbox.format)) - return out_bbox, (height, width) - - spatial_size = (32, 38) - - for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)): - bboxes_format = bboxes.format - bboxes_spatial_size = bboxes.spatial_size - - output_bboxes, output_spatial_size = F.rotate_bounding_box( - bboxes.as_subclass(torch.Tensor), - format=bboxes_format, - spatial_size=bboxes_spatial_size, - angle=angle, - expand=expand, - center=center, - ) - - center_ = center - if center_ is None: - center_ = [s * 0.5 for s in bboxes_spatial_size[::-1]] - - if bboxes.ndim < 2: - bboxes = [bboxes] - - expected_bboxes = [] - for bbox in bboxes: - bbox = datapoints.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size) - expected_bbox, expected_spatial_size = _compute_expected_bbox(bbox, -angle, expand, center_) - expected_bboxes.append(expected_bbox) - if len(expected_bboxes) > 1: - expected_bboxes = torch.stack(expected_bboxes) - else: - expected_bboxes = expected_bboxes[0] - torch.testing.assert_close(output_bboxes, expected_bboxes, atol=1, rtol=0) - torch.testing.assert_close(output_spatial_size, expected_spatial_size, atol=1, rtol=0) - - -@pytest.mark.parametrize("device", cpu_and_gpu()) -@pytest.mark.parametrize("expand", [False]) # expand=True does not match D2 -def test_correctness_rotate_bounding_box_on_fixed_input(device, expand): - # Check transformation against known expected output - format = datapoints.BoundingBoxFormat.XYXY - spatial_size = (64, 64) - # xyxy format - in_boxes = [ - [1, 1, 5, 5], - [1, spatial_size[0] - 6, 5, spatial_size[0] - 2], - [spatial_size[1] - 6, spatial_size[0] - 6, spatial_size[1] - 2, spatial_size[0] - 2], - [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10], - ] - in_boxes = torch.tensor(in_boxes, dtype=torch.float64, device=device) - # Tested parameters - angle = 45 - center = None if expand else [12, 23] - - # # Expected bboxes computed using Detectron2: - # from detectron2.data.transforms import RotationTransform, AugmentationList - # from detectron2.data.transforms import AugInput - # import cv2 - # inpt = AugInput(im1, boxes=np.array(in_boxes, dtype="float32")) - # augs = AugmentationList([RotationTransform(*size, angle, expand=expand, center=center, interp=cv2.INTER_NEAREST), ]) - # out = augs(inpt) - # print(inpt.boxes) - if expand: - expected_bboxes = [ - [1.65937957, 42.67157288, 7.31623382, 48.32842712], - [41.96446609, 82.9766594, 47.62132034, 88.63351365], - [82.26955262, 42.67157288, 87.92640687, 48.32842712], - [31.35786438, 31.35786438, 59.64213562, 59.64213562], - ] - else: - expected_bboxes = [ - [-11.33452378, 12.39339828, -5.67766953, 18.05025253], - [28.97056275, 52.69848481, 34.627417, 58.35533906], - [69.27564928, 12.39339828, 74.93250353, 18.05025253], - [18.36396103, 1.07968978, 46.64823228, 29.36396103], - ] - expected_bboxes = clamp_bounding_box( - datapoints.BoundingBox(expected_bboxes, format="XYXY", spatial_size=spatial_size) - ).tolist() - - output_boxes, _ = F.rotate_bounding_box( - in_boxes, - format=format, - spatial_size=spatial_size, - angle=angle, - expand=expand, - center=center, - ) - - torch.testing.assert_close(output_boxes.tolist(), expected_bboxes) - - -@pytest.mark.parametrize("device", cpu_and_gpu()) -def test_correctness_rotate_segmentation_mask_on_fixed_input(device): - # Check transformation against known expected output and CPU/CUDA devices - - # Create a fixed input segmentation mask with 2 square masks - # in top-left, bottom-left corners - mask = torch.zeros(1, 32, 32, dtype=torch.long, device=device) - mask[0, 2:10, 2:10] = 1 - mask[0, 32 - 9 : 32 - 3, 3:9] = 2 - - # Rotate 90 degrees - expected_mask = torch.rot90(mask, k=1, dims=(-2, -1)) - out_mask = F.rotate_mask(mask, 90, expand=False) - torch.testing.assert_close(out_mask, expected_mask) - - -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "format", [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH], @@ -949,19 +722,7 @@ def test_correctness_crop_bounding_box(device, format, top, left, height, width, torch.testing.assert_close(output_spatial_size, spatial_size) -@pytest.mark.parametrize("device", cpu_and_gpu()) -def test_correctness_horizontal_flip_segmentation_mask_on_fixed_input(device): - mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) - mask[:, :, 0] = 1 - - out_mask = F.horizontal_flip_mask(mask) - - expected_mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) - expected_mask[:, :, -1] = 1 - torch.testing.assert_close(out_mask, expected_mask) - - -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device): mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device) mask[:, 0, :] = 1 @@ -973,7 +734,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device): torch.testing.assert_close(out_mask, expected_mask) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "format", [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH], @@ -1032,7 +793,7 @@ def _parse_padding(padding): return padding -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("padding", [[1], [1, 1], [1, 1, 2, 2]]) def test_correctness_pad_bounding_box(device, padding): def _compute_expected_bbox(bbox, padding_): @@ -1087,7 +848,7 @@ def _compute_expected_spatial_size(bbox, padding_): torch.testing.assert_close(output_boxes, expected_bboxes, atol=1, rtol=0) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_correctness_pad_segmentation_mask_on_fixed_input(device): mask = torch.ones((1, 3, 3), dtype=torch.long, device=device) @@ -1098,7 +859,7 @@ def test_correctness_pad_segmentation_mask_on_fixed_input(device): torch.testing.assert_close(out_mask, expected_mask) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "startpoints, endpoints", [ @@ -1182,7 +943,7 @@ def _compute_expected_bbox(bbox, pcoeffs_): torch.testing.assert_close(output_bboxes, expected_bboxes, rtol=0, atol=1) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize( "output_size", [(18, 18), [18, 15], (16, 19), [12], [46, 48]], @@ -1236,7 +997,7 @@ def _compute_expected_bbox(bbox, output_size_): torch.testing.assert_close(output_spatial_size, output_size) -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("output_size", [[4, 2], [4], [7, 6]]) def test_correctness_center_crop_mask(device, output_size): def _compute_expected_mask(mask, output_size): @@ -1260,7 +1021,7 @@ def _compute_expected_mask(mask, output_size): # Copied from test/test_functional_tensor.py -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) @pytest.mark.parametrize("spatial_size", ("small", "large")) @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16]) @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)]) @@ -1357,7 +1118,7 @@ def test_equalize_image_tensor_edge_cases(): assert output.unique().tolist() == [0, 255] -@pytest.mark.parametrize("device", cpu_and_gpu()) +@pytest.mark.parametrize("device", cpu_and_cuda()) def test_correctness_uniform_temporal_subsample(device): video = torch.arange(10, device=device)[:, None, None, None].expand(-1, 3, 8, 8) out_video = F.uniform_temporal_subsample(video, 5) diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py new file mode 100644 index 00000000000..69180b99dbc --- /dev/null +++ b/test/test_transforms_v2_refactored.py @@ -0,0 +1,1636 @@ +import contextlib +import inspect +import math +import re +from typing import get_type_hints +from unittest import mock + +import numpy as np +import PIL.Image +import pytest + +import torch +import torchvision.transforms.v2 as transforms +from common_utils import ( + assert_equal, + assert_no_warnings, + cache, + cpu_and_cuda, + ignore_jit_no_profile_information_warning, + make_bounding_box, + make_detection_mask, + make_image, + make_image_pil, + make_image_tensor, + make_segmentation_mask, + make_video, + set_rng_seed, +) +from torch.testing import assert_close +from torchvision import datapoints + +from torchvision.transforms._functional_tensor import _max_value as get_max_value +from torchvision.transforms.functional import pil_modes_mapping +from torchvision.transforms.v2 import functional as F + + +@pytest.fixture(autouse=True) +def fix_rng_seed(): + set_rng_seed(0) + yield + + +def _to_tolerances(maybe_tolerance_dict): + if not isinstance(maybe_tolerance_dict, dict): + return dict(rtol=None, atol=None) + + tolerances = dict(rtol=0, atol=0) + tolerances.update(maybe_tolerance_dict) + return tolerances + + +def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel produces closes results for inputs on GPU and CPU.""" + if input.device.type != "cuda": + return + + input_cuda = input.as_subclass(torch.Tensor) + input_cpu = input_cuda.to("cpu") + + actual = kernel(input_cuda, *args, **kwargs) + expected = kernel(input_cpu, *args, **kwargs) + + assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol) + + +@cache +def _script(fn): + try: + return torch.jit.script(fn) + except Exception as error: + raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error + + +def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel is scriptable and if the scripted output is close to the eager one.""" + if input.device.type != "cpu": + return + + kernel_scripted = _script(kernel) + + input = input.as_subclass(torch.Tensor) + with ignore_jit_no_profile_information_warning(): + actual = kernel_scripted(input, *args, **kwargs) + expected = kernel(input, *args, **kwargs) + + assert_close(actual, expected, rtol=rtol, atol=atol) + + +def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs): + """Checks if the kernel produces close results for batched and unbatched inputs.""" + unbatched_input = input.as_subclass(torch.Tensor) + + for batch_dims in [(2,), (2, 1)]: + repeats = [*batch_dims, *[1] * input.ndim] + + actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs) + + expected = kernel(unbatched_input, *args, **kwargs) + # We can't directly call `.repeat()` on the output, since some kernel also return some additional metadata + if isinstance(expected, torch.Tensor): + expected = expected.repeat(repeats) + else: + tensor, *metadata = expected + expected = (tensor.repeat(repeats), *metadata) + + assert_close(actual, expected, rtol=rtol, atol=atol) + + for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]: + degenerate_batched_input = torch.empty( + degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device + ) + + output = kernel(degenerate_batched_input, *args, **kwargs) + # Most kernels just return a tensor, but some also return some additional metadata + if not isinstance(output, torch.Tensor): + output, *_ = output + + assert output.shape[: -input.ndim] == degenerate_batch_dims + + +def check_kernel( + kernel, + input, + *args, + check_cuda_vs_cpu=True, + check_scripted_vs_eager=True, + check_batched_vs_unbatched=True, + **kwargs, +): + initial_input_version = input._version + + output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs) + # Most kernels just return a tensor, but some also return some additional metadata + if not isinstance(output, torch.Tensor): + output, *_ = output + + # check that no inplace operation happened + assert input._version == initial_input_version + + assert output.dtype == input.dtype + assert output.device == input.device + + if check_cuda_vs_cpu: + _check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu)) + + if check_scripted_vs_eager: + _check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager)) + + if check_batched_vs_unbatched: + _check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched)) + + +def _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs): + """Checks if the dispatcher can be scripted and the scripted version can be called without error.""" + if not isinstance(input, datapoints.Image): + return + + dispatcher_scripted = _script(dispatcher) + with ignore_jit_no_profile_information_warning(): + dispatcher_scripted(input.as_subclass(torch.Tensor), *args, **kwargs) + + +def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs): + """Checks if the dispatcher correctly dispatches the input to the corresponding kernel and that the input type is + preserved in doing so. For bounding boxes also checks that the format is preserved. + """ + if isinstance(input, datapoints._datapoint.Datapoint): + # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly, + # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel. + spy = mock.MagicMock(wraps=kernel, name=kernel.__name__) + with mock.patch.object(F, kernel.__name__, spy): + # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class. + # Since that is not the case here, we need to prefix f"_{cls.__name__}" + # See https://docs.python.org/3/tutorial/classes.html#private-variables for details + with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F): + output = dispatcher(input, *args, **kwargs) + + spy.assert_called_once() + else: + with mock.patch(f"{dispatcher.__module__}.{kernel.__name__}", wraps=kernel) as spy: + output = dispatcher(input, *args, **kwargs) + + spy.assert_called_once() + + assert isinstance(output, type(input)) + + if isinstance(input, datapoints.BoundingBox): + assert output.format == input.format + + +def check_dispatcher( + dispatcher, + kernel, + input, + *args, + check_scripted_smoke=True, + check_dispatch=True, + **kwargs, +): + with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy: + dispatcher(input, *args, **kwargs) + + spy.assert_any_call(f"{dispatcher.__module__}.{dispatcher.__name__}") + + unknown_input = object() + with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))): + dispatcher(unknown_input, *args, **kwargs) + + if check_scripted_smoke: + _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs) + + if check_dispatch: + _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs) + + +def _check_dispatcher_kernel_signature_match(dispatcher, *, kernel, input_type): + """Checks if the signature of the dispatcher matches the kernel signature.""" + dispatcher_signature = inspect.signature(dispatcher) + dispatcher_params = list(dispatcher_signature.parameters.values())[1:] + + kernel_signature = inspect.signature(kernel) + kernel_params = list(kernel_signature.parameters.values())[1:] + + if issubclass(input_type, datapoints._datapoint.Datapoint): + # We filter out metadata that is implicitly passed to the dispatcher through the input datapoint, but has to be + # explicitly passed to the kernel. + kernel_params = [param for param in kernel_params if param.name not in input_type.__annotations__.keys()] + + dispatcher_params = iter(dispatcher_params) + for dispatcher_param, kernel_param in zip(dispatcher_params, kernel_params): + try: + # In general, the dispatcher parameters are a superset of the kernel parameters. Thus, we filter out + # dispatcher parameters that have no kernel equivalent while keeping the order intact. + while dispatcher_param.name != kernel_param.name: + dispatcher_param = next(dispatcher_params) + except StopIteration: + raise AssertionError( + f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` " + f"has no corresponding parameter on the dispatcher `{dispatcher.__name__}`." + ) from None + + if issubclass(input_type, PIL.Image.Image): + # PIL kernels often have more correct annotations, since they are not limited by JIT. Thus, we don't check + # them in the first place. + dispatcher_param._annotation = kernel_param._annotation = inspect.Parameter.empty + + assert dispatcher_param == kernel_param + + +def _check_dispatcher_datapoint_signature_match(dispatcher): + """Checks if the signature of the dispatcher matches the corresponding method signature on the Datapoint class.""" + dispatcher_signature = inspect.signature(dispatcher) + dispatcher_params = list(dispatcher_signature.parameters.values())[1:] + + datapoint_method = getattr(datapoints._datapoint.Datapoint, dispatcher.__name__) + datapoint_signature = inspect.signature(datapoint_method) + datapoint_params = list(datapoint_signature.parameters.values())[1:] + + # Some annotations in the `datapoints._datapoint` module + # are stored as strings. The block below makes them concrete again (non-strings), so they can be compared to the + # natively concrete dispatcher annotations. + datapoint_annotations = get_type_hints(datapoint_method) + for param in datapoint_params: + param._annotation = datapoint_annotations[param.name] + + assert dispatcher_params == datapoint_params + + +def check_dispatcher_signatures_match(dispatcher, *, kernel, input_type): + _check_dispatcher_kernel_signature_match(dispatcher, kernel=kernel, input_type=input_type) + _check_dispatcher_datapoint_signature_match(dispatcher) + + +def _check_transform_v1_compatibility(transform, input): + """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static + ``get_params`` method, is scriptable, and the scripted version can be called without error.""" + if not hasattr(transform, "_v1_transform_cls"): + return + + if type(input) is not torch.Tensor: + return + + if hasattr(transform._v1_transform_cls, "get_params"): + assert type(transform).get_params is transform._v1_transform_cls.get_params + + scripted_transform = _script(transform) + with ignore_jit_no_profile_information_warning(): + scripted_transform(input) + + +def check_transform(transform_cls, input, *args, **kwargs): + transform = transform_cls(*args, **kwargs) + + output = transform(input) + assert isinstance(output, type(input)) + + if isinstance(input, datapoints.BoundingBox): + assert output.format == input.format + + _check_transform_v1_compatibility(transform, input) + + +def transform_cls_to_functional(transform_cls, **transform_specific_kwargs): + def wrapper(input, *args, **kwargs): + transform = transform_cls(*args, **transform_specific_kwargs, **kwargs) + return transform(input) + + wrapper.__name__ = transform_cls.__name__ + + return wrapper + + +def param_value_parametrization(**kwargs): + """Helper function to turn + + @pytest.mark.parametrize( + ("param", "value"), + ("a", 1), + ("a", 2), + ("a", 3), + ("b", -1.0) + ("b", 1.0) + ) + + into + + @param_value_parametrization(a=[1, 2, 3], b=[-1.0, 1.0]) + """ + return pytest.mark.parametrize( + ("param", "value"), + [(param, value) for param, values in kwargs.items() for value in values], + ) + + +def adapt_fill(value, *, dtype): + """Adapt fill values in the range [0.0, 1.0] to the value range of the dtype""" + if value is None: + return value + + max_value = get_max_value(dtype) + + if isinstance(value, (int, float)): + return type(value)(value * max_value) + elif isinstance(value, (list, tuple)): + return type(value)(type(v)(v * max_value) for v in value) + else: + raise ValueError(f"fill should be an int or float, or a list or tuple of the former, but got '{value}'.") + + +EXHAUSTIVE_TYPE_FILLS = [ + None, + 1, + 0.5, + [1], + [0.2], + (0,), + (0.7,), + [1, 0, 1], + [0.1, 0.2, 0.3], + (0, 1, 0), + (0.9, 0.234, 0.314), +] +CORRECTNESS_FILLS = [ + v for v in EXHAUSTIVE_TYPE_FILLS if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1) +] + + +# We cannot use `list(transforms.InterpolationMode)` here, since it includes some PIL-only ones as well +INTERPOLATION_MODES = [ + transforms.InterpolationMode.NEAREST, + transforms.InterpolationMode.NEAREST_EXACT, + transforms.InterpolationMode.BILINEAR, + transforms.InterpolationMode.BICUBIC, +] + + +@contextlib.contextmanager +def assert_warns_antialias_default_value(): + with pytest.warns(UserWarning, match="The default value of the antialias parameter of all the resizing transforms"): + yield + + +def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix): + def transform(bbox): + # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1 + in_dtype = bbox.dtype + if not torch.is_floating_point(bbox): + bbox = bbox.float() + bbox_xyxy = F.convert_format_bounding_box( + bbox.as_subclass(torch.Tensor), + old_format=format, + new_format=datapoints.BoundingBoxFormat.XYXY, + inplace=True, + ) + points = np.array( + [ + [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0], + [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0], + [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0], + [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0], + ] + ) + transformed_points = np.matmul(points, affine_matrix.T) + out_bbox = torch.tensor( + [ + np.min(transformed_points[:, 0]).item(), + np.min(transformed_points[:, 1]).item(), + np.max(transformed_points[:, 0]).item(), + np.max(transformed_points[:, 1]).item(), + ], + dtype=bbox_xyxy.dtype, + ) + out_bbox = F.convert_format_bounding_box( + out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True + ) + # It is important to clamp before casting, especially for CXCYWH format, dtype=int64 + out_bbox = F.clamp_bounding_box(out_bbox, format=format, spatial_size=spatial_size) + out_bbox = out_bbox.to(dtype=in_dtype) + return out_bbox + + return torch.stack([transform(b) for b in bounding_box.reshape(-1, 4).unbind()]).reshape(bounding_box.shape) + + +class TestResize: + INPUT_SIZE = (17, 11) + OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)] + + def _make_max_size_kwarg(self, *, use_max_size, size): + if use_max_size: + if not (isinstance(size, int) or len(size) == 1): + # This would result in an `ValueError` + return None + + max_size = (size if isinstance(size, int) else size[0]) + 1 + else: + max_size = None + + return dict(max_size=max_size) + + def _compute_output_size(self, *, input_size, size, max_size): + if not (isinstance(size, int) or len(size) == 1): + return tuple(size) + + if not isinstance(size, int): + size = size[0] + + old_height, old_width = input_size + ratio = old_width / old_height + if ratio > 1: + new_height = size + new_width = int(ratio * new_height) + else: + new_width = size + new_height = int(new_width / ratio) + + if max_size is not None and max(new_height, new_width) > max_size: + # Need to recompute the aspect ratio, since it might have changed due to rounding + ratio = new_width / new_height + if ratio > 1: + new_width = max_size + new_height = int(new_width / ratio) + else: + new_height = max_size + new_width = int(new_height * ratio) + + return new_height, new_width + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("antialias", [True, False]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, dtype, device): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA. + # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that. + atol = 30 if transforms.InterpolationMode.BICUBIC and dtype is torch.uint8 else 1 + check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol) + + check_kernel( + F.resize_image_tensor, + make_image(self.INPUT_SIZE, dtype=dtype, device=device), + size=size, + interpolation=interpolation, + **max_size_kwarg, + antialias=antialias, + check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances, + check_scripted_vs_eager=not isinstance(size, int), + ) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + bounding_box = make_bounding_box( + format=format, + spatial_size=self.INPUT_SIZE, + dtype=dtype, + device=device, + ) + check_kernel( + F.resize_bounding_box, + bounding_box, + spatial_size=bounding_box.spatial_size, + size=size, + **max_size_kwarg, + check_scripted_vs_eager=not isinstance(size, int), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.resize_mask, make_mask(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1]) + + def test_kernel_video(self): + check_kernel(F.resize_video, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.resize_image_tensor, make_image_tensor), + (F.resize_image_pil, make_image_pil), + (F.resize_image_tensor, make_image), + (F.resize_bounding_box, make_bounding_box), + (F.resize_mask, make_segmentation_mask), + (F.resize_video, make_video), + ], + ) + def test_dispatcher(self, size, kernel, make_input): + check_dispatcher( + F.resize, + kernel, + make_input(self.INPUT_SIZE), + size=size, + antialias=True, + check_scripted_smoke=not isinstance(size, int), + ) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.resize_image_tensor, torch.Tensor), + (F.resize_image_pil, PIL.Image.Image), + (F.resize_image_tensor, datapoints.Image), + (F.resize_bounding_box, datapoints.BoundingBox), + (F.resize_mask, datapoints.Mask), + (F.resize_video, datapoints.Video), + ], + ) + def test_dispatcher_signature(self, kernel, input_type): + check_dispatcher_signatures_match(F.resize, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("device", cpu_and_cuda()) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], + ) + def test_transform(self, size, device, make_input): + check_transform(transforms.Resize, make_input(self.INPUT_SIZE, device=device), size=size, antialias=True) + + def _check_output_size(self, input, output, *, size, max_size): + assert tuple(F.get_spatial_size(output)) == self._compute_output_size( + input_size=F.get_spatial_size(input), size=size, max_size=max_size + ) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2. + # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT` + @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST}) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)]) + def test_image_correctness(self, size, interpolation, use_max_size, fn): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + image = make_image(self.INPUT_SIZE, dtype=torch.uint8) + + actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True) + expected = F.to_image_tensor( + F.resize(F.to_image_pil(image), size=size, interpolation=interpolation, **max_size_kwarg) + ) + + self._check_output_size(image, actual, size=size, **max_size_kwarg) + torch.testing.assert_close(actual, expected, atol=1, rtol=0) + + def _reference_resize_bounding_box(self, bounding_box, *, size, max_size=None): + old_height, old_width = bounding_box.spatial_size + new_height, new_width = self._compute_output_size( + input_size=bounding_box.spatial_size, size=size, max_size=max_size + ) + + if (old_height, old_width) == (new_height, new_width): + return bounding_box + + affine_matrix = np.array( + [ + [new_width / old_width, 0, 0], + [0, new_height / old_height, 0], + ], + dtype="float64" if bounding_box.dtype == torch.float64 else "float32", + ) + + expected_bboxes = reference_affine_bounding_box_helper( + bounding_box, + format=bounding_box.format, + spatial_size=(new_height, new_width), + affine_matrix=affine_matrix, + ) + return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes, spatial_size=(new_height, new_width)) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize("use_max_size", [True, False]) + @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)]) + def test_bounding_box_correctness(self, format, size, use_max_size, fn): + if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)): + return + + bounding_box = make_bounding_box(format=format, spatial_size=self.INPUT_SIZE) + + actual = fn(bounding_box, size=size, **max_size_kwarg) + expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg) + + self._check_output_size(bounding_box, actual, size=size, **max_size_kwarg) + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES)) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_pil_interpolation_compat_smoke(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) + + with ( + contextlib.nullcontext() + if isinstance(input, PIL.Image.Image) + # This error is triggered in PyTorch core + else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}") + ): + F.resize( + input, + size=self.OUTPUT_SIZES[0], + interpolation=interpolation, + ) + + def test_dispatcher_pil_antialias_warning(self): + with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"): + F.resize(make_image_pil(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False) + + @pytest.mark.parametrize("size", OUTPUT_SIZES) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], + ) + def test_max_size_error(self, size, make_input): + if isinstance(size, int) or len(size) == 1: + max_size = (size if isinstance(size, int) else size[0]) - 1 + match = "must be strictly greater than the requested size" + else: + # value can be anything other than None + max_size = -1 + match = "size should be an int or a sequence of length 1" + + with pytest.raises(ValueError, match=match): + F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True) + + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image, make_video], + ) + def test_antialias_warning(self, interpolation, make_input): + with ( + assert_warns_antialias_default_value() + if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC} + else assert_no_warnings() + ): + F.resize( + make_input(self.INPUT_SIZE), + size=self.OUTPUT_SIZES[0], + interpolation=interpolation, + ) + + @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES) + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_video], + ) + def test_interpolation_int(self, interpolation, make_input): + input = make_input(self.INPUT_SIZE) + + # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to + # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a + # difference and thus we don't test it here. + if isinstance(input, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT: + return + + expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True) + actual = F.resize( + input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True + ) + + assert_equal(actual, expected) + + def test_transform_unknown_size_error(self): + with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"): + transforms.Resize(size=object()) + + @pytest.mark.parametrize( + "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)] + ) + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], + ) + def test_noop(self, size, make_input): + input = make_input(self.INPUT_SIZE) + + output = F.resize(input, size=F.get_spatial_size(input), antialias=True) + + # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there + # is a good reason to break this, feel free to downgrade to an equality check. + if isinstance(input, datapoints._datapoint.Datapoint): + # We can't test identity directly, since that checks for the identity of the Python object. Since all + # datapoints unwrap before a kernel and wrap again afterwards, the Python object changes. Thus, we check + # that the underlying storage is the same + assert output.data_ptr() == input.data_ptr() + else: + assert output is input + + @pytest.mark.parametrize( + "make_input", + [ + make_image_tensor, + make_image_pil, + make_image, + make_bounding_box, + make_segmentation_mask, + make_detection_mask, + make_video, + ], + ) + def test_no_regression_5405(self, make_input): + # Checks that `max_size` is not ignored if `size == small_edge_size` + # See https://github.com/pytorch/vision/issues/5405 + + input = make_input(self.INPUT_SIZE) + + size = min(F.get_spatial_size(input)) + max_size = size + 1 + output = F.resize(input, size=size, max_size=max_size, antialias=True) + + assert max(F.get_spatial_size(output)) == max_size + + +class TestHorizontalFlip: + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_tensor(self, dtype, device): + check_kernel(F.horizontal_flip_image_tensor, make_image(dtype=dtype, device=device)) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, format, dtype, device): + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) + check_kernel( + F.horizontal_flip_bounding_box, + bounding_box, + format=format, + spatial_size=bounding_box.spatial_size, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.horizontal_flip_mask, make_mask()) + + def test_kernel_video(self): + check_kernel(F.horizontal_flip_video, make_video()) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.horizontal_flip_image_tensor, make_image_tensor), + (F.horizontal_flip_image_pil, make_image_pil), + (F.horizontal_flip_image_tensor, make_image), + (F.horizontal_flip_bounding_box, make_bounding_box), + (F.horizontal_flip_mask, make_segmentation_mask), + (F.horizontal_flip_video, make_video), + ], + ) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.horizontal_flip, kernel, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.horizontal_flip_image_tensor, torch.Tensor), + (F.horizontal_flip_image_pil, PIL.Image.Image), + (F.horizontal_flip_image_tensor, datapoints.Image), + (F.horizontal_flip_bounding_box, datapoints.BoundingBox), + (F.horizontal_flip_mask, datapoints.Mask), + (F.horizontal_flip_video, datapoints.Video), + ], + ) + def test_dispatcher_signature(self, kernel, input_type): + check_dispatcher_signatures_match(F.horizontal_flip, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform(transforms.RandomHorizontalFlip, make_input(device=device), p=1) + + @pytest.mark.parametrize( + "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] + ) + def test_image_correctness(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image_tensor(F.horizontal_flip(F.to_image_pil(image))) + + torch.testing.assert_close(actual, expected) + + def _reference_horizontal_flip_bounding_box(self, bounding_box): + affine_matrix = np.array( + [ + [-1, 0, bounding_box.spatial_size[1]], + [0, 1, 0], + ], + dtype="float64" if bounding_box.dtype == torch.float64 else "float32", + ) + + expected_bboxes = reference_affine_bounding_box_helper( + bounding_box, + format=bounding_box.format, + spatial_size=bounding_box.spatial_size, + affine_matrix=affine_matrix, + ) + + return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize( + "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)] + ) + def test_bounding_box_correctness(self, format, fn): + bounding_box = make_bounding_box(format=format) + + actual = fn(bounding_box) + expected = self._reference_horizontal_flip_bounding_box(bounding_box) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) + + transform = transforms.RandomHorizontalFlip(p=0) + + output = transform(input) + + assert_equal(output, input) + + +class TestAffine: + _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict( + # float, int + angle=[-10.9, 18], + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + translate=[[6.3, -0.6], [1, -3], (16.6, -6.6), (-2, 4)], + # float + scale=[0.5], + # float, int, + # one-list of float, one-list of int, one-tuple of float, one-tuple of int + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + shear=[35.6, 38, [-37.7], [-23], (5.3,), (-52,), [5.4, 21.8], [-47, 51], (-11.2, 36.7), (8, -53)], + # None + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)], + ) + # The special case for shear makes sure we pick a value that is supported while JIT scripting + _MINIMAL_AFFINE_KWARGS = { + k: vs[0] if k != "shear" else next(v for v in vs if isinstance(v, list)) + for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items() + } + _CORRECTNESS_AFFINE_KWARGS = { + k: [v for v in vs if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1)] + for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items() + } + + _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict( + degrees=[30, (-15, 20)], + translate=[None, (0.5, 0.5)], + scale=[None, (0.75, 1.25)], + shear=[None, (12, 30, -17, 5), 10, (-5, 12)], + ) + _CORRECTNESS_TRANSFORM_AFFINE_RANGES = { + k: next(v for v in vs if v is not None) for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items() + } + + def _check_kernel(self, kernel, input, *args, **kwargs): + kwargs_ = self._MINIMAL_AFFINE_KWARGS.copy() + kwargs_.update(kwargs) + check_kernel(kernel, input, *args, **kwargs_) + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"], + shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_tensor(self, param, value, dtype, device): + if param == "fill": + value = adapt_fill(value, dtype=dtype) + self._check_kernel( + F.affine_image_tensor, + make_image(dtype=dtype, device=device), + **{param: value}, + check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))), + check_cuda_vs_cpu=dict(atol=1, rtol=0) + if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR + else True, + ) + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"], + shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + ) + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, param, value, format, dtype, device): + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) + self._check_kernel( + F.affine_bounding_box, + bounding_box, + format=format, + spatial_size=bounding_box.spatial_size, + **{param: value}, + check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))), + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + self._check_kernel(F.affine_mask, make_mask()) + + def test_kernel_video(self): + self._check_kernel(F.affine_video, make_video()) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.affine_image_tensor, make_image_tensor), + (F.affine_image_pil, make_image_pil), + (F.affine_image_tensor, make_image), + (F.affine_bounding_box, make_bounding_box), + (F.affine_mask, make_segmentation_mask), + (F.affine_video, make_video), + ], + ) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.affine, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.affine_image_tensor, torch.Tensor), + (F.affine_image_pil, PIL.Image.Image), + (F.affine_image_tensor, datapoints.Image), + (F.affine_bounding_box, datapoints.BoundingBox), + (F.affine_mask, datapoints.Mask), + (F.affine_video, datapoints.Video), + ], + ) + def test_dispatcher_signature(self, kernel, input_type): + check_dispatcher_signatures_match(F.affine, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + input = make_input(device=device) + + check_transform(transforms.RandomAffine, input, **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES) + + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"]) + @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"]) + @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + actual = F.affine( + image, + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + interpolation=interpolation, + fill=fill, + ) + expected = F.to_image_tensor( + F.affine( + F.to_image_pil(image), + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + interpolation=interpolation, + fill=fill, + ) + ) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8 + + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, center, interpolation, fill, seed): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + transform = transforms.RandomAffine( + **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center, interpolation=interpolation, fill=fill + ) + + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image_tensor(transform(F.to_image_pil(image))) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8 + + def _compute_affine_matrix(self, *, angle, translate, scale, shear, center): + rot = math.radians(angle) + cx, cy = center + tx, ty = translate + sx, sy = [math.radians(s) for s in ([shear, 0.0] if isinstance(shear, (int, float)) else shear)] + + c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]]) + t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) + c_matrix_inv = np.linalg.inv(c_matrix) + rs_matrix = np.array( + [ + [scale * math.cos(rot), -scale * math.sin(rot), 0], + [scale * math.sin(rot), scale * math.cos(rot), 0], + [0, 0, 1], + ] + ) + shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]]) + shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]]) + rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix)) + true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv))) + return true_matrix + + def _reference_affine_bounding_box(self, bounding_box, *, angle, translate, scale, shear, center): + if center is None: + center = [s * 0.5 for s in bounding_box.spatial_size[::-1]] + + affine_matrix = self._compute_affine_matrix( + angle=angle, translate=translate, scale=scale, shear=shear, center=center + ) + affine_matrix = affine_matrix[:2, :] + + expected_bboxes = reference_affine_bounding_box_helper( + bounding_box, + format=bounding_box.format, + spatial_size=bounding_box.spatial_size, + affine_matrix=affine_matrix, + ) + + return expected_bboxes + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"]) + @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"]) + @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + def test_functional_bounding_box_correctness(self, format, angle, translate, scale, shear, center): + bounding_box = make_bounding_box(format=format) + + actual = F.affine( + bounding_box, + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + ) + expected = self._reference_affine_bounding_box( + bounding_box, + angle=angle, + translate=translate, + scale=scale, + shear=shear, + center=center, + ) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_bounding_box_correctness(self, format, center, seed): + bounding_box = make_bounding_box(format=format) + + transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center) + + torch.manual_seed(seed) + params = transform._get_params([bounding_box]) + + torch.manual_seed(seed) + actual = transform(bounding_box) + + expected = self._reference_affine_bounding_box(bounding_box, **params, center=center) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"]) + @pytest.mark.parametrize("translate", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["translate"]) + @pytest.mark.parametrize("scale", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["scale"]) + @pytest.mark.parametrize("shear", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["shear"]) + @pytest.mark.parametrize("seed", list(range(10))) + def test_transform_get_params_bounds(self, degrees, translate, scale, shear, seed): + image = make_image() + height, width = F.get_spatial_size(image) + + transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, shear=shear) + + torch.manual_seed(seed) + params = transform._get_params([image]) + + if isinstance(degrees, (int, float)): + assert -degrees <= params["angle"] <= degrees + else: + assert degrees[0] <= params["angle"] <= degrees[1] + + if translate is not None: + width_max = int(round(translate[0] * width)) + height_max = int(round(translate[1] * height)) + assert -width_max <= params["translate"][0] <= width_max + assert -height_max <= params["translate"][1] <= height_max + else: + assert params["translate"] == (0, 0) + + if scale is not None: + assert scale[0] <= params["scale"] <= scale[1] + else: + assert params["scale"] == 1.0 + + if shear is not None: + if isinstance(shear, (int, float)): + assert -shear <= params["shear"][0] <= shear + assert params["shear"][1] == 0.0 + elif len(shear) == 2: + assert shear[0] <= params["shear"][0] <= shear[1] + assert params["shear"][1] == 0.0 + elif len(shear) == 4: + assert shear[0] <= params["shear"][0] <= shear[1] + assert shear[2] <= params["shear"][1] <= shear[3] + else: + assert params["shear"] == (0, 0) + + @pytest.mark.parametrize("param", ["degrees", "translate", "scale", "shear", "center"]) + @pytest.mark.parametrize("value", [0, [0], [0, 0, 0]]) + def test_transform_sequence_len_errors(self, param, value): + if param in {"degrees", "shear"} and not isinstance(value, list): + return + + kwargs = {param: value} + if param != "degrees": + kwargs["degrees"] = 0 + + with pytest.raises( + ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2" + ): + transforms.RandomAffine(**kwargs) + + def test_transform_negative_degrees_error(self): + with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"): + transforms.RandomAffine(degrees=-1) + + @pytest.mark.parametrize("translate", [[-1, 0], [2, 0], [-1, 2]]) + def test_transform_translate_range_error(self, translate): + with pytest.raises(ValueError, match="translation values should be between 0 and 1"): + transforms.RandomAffine(degrees=0, translate=translate) + + @pytest.mark.parametrize("scale", [[-1, 0], [0, -1], [-1, -1]]) + def test_transform_scale_range_error(self, scale): + with pytest.raises(ValueError, match="scale values should be positive"): + transforms.RandomAffine(degrees=0, scale=scale) + + def test_transform_negative_shear_error(self): + with pytest.raises(ValueError, match="If shear is a single number, it must be positive"): + transforms.RandomAffine(degrees=0, shear=-1) + + def test_transform_unknown_fill_error(self): + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomAffine(degrees=0, fill="fill") + + +class TestVerticalFlip: + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_tensor(self, dtype, device): + check_kernel(F.vertical_flip_image_tensor, make_image(dtype=dtype, device=device)) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.int64]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, format, dtype, device): + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) + check_kernel( + F.vertical_flip_bounding_box, + bounding_box, + format=format, + spatial_size=bounding_box.spatial_size, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.vertical_flip_mask, make_mask()) + + def test_kernel_video(self): + check_kernel(F.vertical_flip_video, make_video()) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.vertical_flip_image_tensor, make_image_tensor), + (F.vertical_flip_image_pil, make_image_pil), + (F.vertical_flip_image_tensor, make_image), + (F.vertical_flip_bounding_box, make_bounding_box), + (F.vertical_flip_mask, make_segmentation_mask), + (F.vertical_flip_video, make_video), + ], + ) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.vertical_flip, kernel, make_input()) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.vertical_flip_image_tensor, torch.Tensor), + (F.vertical_flip_image_pil, PIL.Image.Image), + (F.vertical_flip_image_tensor, datapoints.Image), + (F.vertical_flip_bounding_box, datapoints.BoundingBox), + (F.vertical_flip_mask, datapoints.Mask), + (F.vertical_flip_video, datapoints.Video), + ], + ) + def test_dispatcher_signature(self, kernel, input_type): + check_dispatcher_signatures_match(F.vertical_flip, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform(transforms.RandomVerticalFlip, make_input(device=device), p=1) + + @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) + def test_image_correctness(self, fn): + image = make_image(dtype=torch.uint8, device="cpu") + + actual = fn(image) + expected = F.to_image_tensor(F.vertical_flip(F.to_image_pil(image))) + + torch.testing.assert_close(actual, expected) + + def _reference_vertical_flip_bounding_box(self, bounding_box): + affine_matrix = np.array( + [ + [1, 0, 0], + [0, -1, bounding_box.spatial_size[0]], + ], + dtype="float64" if bounding_box.dtype == torch.float64 else "float32", + ) + + expected_bboxes = reference_affine_bounding_box_helper( + bounding_box, + format=bounding_box.format, + spatial_size=bounding_box.spatial_size, + affine_matrix=affine_matrix, + ) + + return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)]) + def test_bounding_box_correctness(self, format, fn): + bounding_box = make_bounding_box(format=format) + + actual = fn(bounding_box) + expected = self._reference_vertical_flip_bounding_box(bounding_box) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform_noop(self, make_input, device): + input = make_input(device=device) + + transform = transforms.RandomVerticalFlip(p=0) + + output = transform(input) + + assert_equal(output, input) + + +class TestRotate: + _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict( + # float, int + angle=[-10.9, 18], + # None + # two-list of float, two-list of int, two-tuple of float, two-tuple of int + center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)], + ) + _MINIMAL_AFFINE_KWARGS = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()} + _CORRECTNESS_AFFINE_KWARGS = { + k: [v for v in vs if v is None or isinstance(v, float) or isinstance(v, list)] + for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items() + } + + _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict( + degrees=[30, (-15, 20)], + ) + _CORRECTNESS_TRANSFORM_AFFINE_RANGES = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items()} + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR], + expand=[False, True], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + fill=EXHAUSTIVE_TYPE_FILLS, + ) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_image_tensor(self, param, value, dtype, device): + kwargs = {param: value} + if param != "angle": + kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] + check_kernel( + F.rotate_image_tensor, + make_image(dtype=dtype, device=device), + **kwargs, + check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))), + ) + + @param_value_parametrization( + angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"], + expand=[False, True], + center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"], + ) + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8]) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_kernel_bounding_box(self, param, value, format, dtype, device): + kwargs = {param: value} + if param != "angle": + kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"] + + bounding_box = make_bounding_box(format=format, dtype=dtype, device=device) + + check_kernel( + F.rotate_bounding_box, + bounding_box, + format=format, + spatial_size=bounding_box.spatial_size, + **kwargs, + ) + + @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask]) + def test_kernel_mask(self, make_mask): + check_kernel(F.rotate_mask, make_mask(), **self._MINIMAL_AFFINE_KWARGS) + + def test_kernel_video(self): + check_kernel(F.rotate_video, make_video(), **self._MINIMAL_AFFINE_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "make_input"), + [ + (F.rotate_image_tensor, make_image_tensor), + (F.rotate_image_pil, make_image_pil), + (F.rotate_image_tensor, make_image), + (F.rotate_bounding_box, make_bounding_box), + (F.rotate_mask, make_segmentation_mask), + (F.rotate_video, make_video), + ], + ) + def test_dispatcher(self, kernel, make_input): + check_dispatcher(F.rotate, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS) + + @pytest.mark.parametrize( + ("kernel", "input_type"), + [ + (F.rotate_image_tensor, torch.Tensor), + (F.rotate_image_pil, PIL.Image.Image), + (F.rotate_image_tensor, datapoints.Image), + (F.rotate_bounding_box, datapoints.BoundingBox), + (F.rotate_mask, datapoints.Mask), + (F.rotate_video, datapoints.Video), + ], + ) + def test_dispatcher_signature(self, kernel, input_type): + check_dispatcher_signatures_match(F.rotate, kernel=kernel, input_type=input_type) + + @pytest.mark.parametrize( + "make_input", + [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video], + ) + @pytest.mark.parametrize("device", cpu_and_cuda()) + def test_transform(self, make_input, device): + check_transform( + transforms.RandomRotation, make_input(device=device), **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES + ) + + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + def test_functional_image_correctness(self, angle, center, interpolation, expand, fill): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + actual = F.rotate(image, angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill) + expected = F.to_image_tensor( + F.rotate( + F.to_image_pil(image), angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill + ) + ) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6 + + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize( + "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR] + ) + @pytest.mark.parametrize("expand", [False, True]) + @pytest.mark.parametrize("fill", CORRECTNESS_FILLS) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_image_correctness(self, center, interpolation, expand, fill, seed): + image = make_image(dtype=torch.uint8, device="cpu") + + fill = adapt_fill(fill, dtype=torch.uint8) + + transform = transforms.RandomRotation( + **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, + center=center, + interpolation=interpolation, + expand=expand, + fill=fill, + ) + + torch.manual_seed(seed) + actual = transform(image) + + torch.manual_seed(seed) + expected = F.to_image_tensor(transform(F.to_image_pil(image))) + + mae = (actual.float() - expected.float()).abs().mean() + assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6 + + def _reference_rotate_bounding_box(self, bounding_box, *, angle, expand, center): + # FIXME + if expand: + raise ValueError("This reference currently does not support expand=True") + + if center is None: + center = [s * 0.5 for s in bounding_box.spatial_size[::-1]] + + a = np.cos(angle * np.pi / 180.0) + b = np.sin(angle * np.pi / 180.0) + cx = center[0] + cy = center[1] + affine_matrix = np.array( + [ + [a, b, cx - cx * a - b * cy], + [-b, a, cy + cx * b - a * cy], + ], + dtype="float64" if bounding_box.dtype == torch.float64 else "float32", + ) + + expected_bboxes = reference_affine_bounding_box_helper( + bounding_box, + format=bounding_box.format, + spatial_size=bounding_box.spatial_size, + affine_matrix=affine_matrix, + ) + + return expected_bboxes + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"]) + # TODO: add support for expand=True in the reference + @pytest.mark.parametrize("expand", [False]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + def test_functional_bounding_box_correctness(self, format, angle, expand, center): + bounding_box = make_bounding_box(format=format) + + actual = F.rotate(bounding_box, angle=angle, expand=expand, center=center) + expected = self._reference_rotate_bounding_box(bounding_box, angle=angle, expand=expand, center=center) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat)) + # TODO: add support for expand=True in the reference + @pytest.mark.parametrize("expand", [False]) + @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"]) + @pytest.mark.parametrize("seed", list(range(5))) + def test_transform_bounding_box_correctness(self, format, expand, center, seed): + bounding_box = make_bounding_box(format=format) + + transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center) + + torch.manual_seed(seed) + params = transform._get_params([bounding_box]) + + torch.manual_seed(seed) + actual = transform(bounding_box) + + expected = self._reference_rotate_bounding_box(bounding_box, **params, expand=expand, center=center) + + torch.testing.assert_close(actual, expected) + + @pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"]) + @pytest.mark.parametrize("seed", list(range(10))) + def test_transform_get_params_bounds(self, degrees, seed): + transform = transforms.RandomRotation(degrees=degrees) + + torch.manual_seed(seed) + params = transform._get_params([]) + + if isinstance(degrees, (int, float)): + assert -degrees <= params["angle"] <= degrees + else: + assert degrees[0] <= params["angle"] <= degrees[1] + + @pytest.mark.parametrize("param", ["degrees", "center"]) + @pytest.mark.parametrize("value", [0, [0], [0, 0, 0]]) + def test_transform_sequence_len_errors(self, param, value): + if param == "degrees" and not isinstance(value, list): + return + + kwargs = {param: value} + if param != "degrees": + kwargs["degrees"] = 0 + + with pytest.raises( + ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2" + ): + transforms.RandomRotation(**kwargs) + + def test_transform_negative_degrees_error(self): + with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"): + transforms.RandomAffine(degrees=-1) + + def test_transform_unknown_fill_error(self): + with pytest.raises(TypeError, match="Got inappropriate fill arg"): + transforms.RandomAffine(degrees=0, fill="fill") diff --git a/test/test_utils.py b/test/test_utils.py index 32b3db59631..b13bd0f0f5b 100644 --- a/test/test_utils.py +++ b/test/test_utils.py @@ -9,7 +9,7 @@ import torch import torchvision.transforms.functional as F import torchvision.utils as utils -from common_utils import assert_equal +from common_utils import assert_equal, cpu_and_cuda from PIL import __version__ as PILLOW_VERSION, Image, ImageColor @@ -203,12 +203,13 @@ def test_draw_no_boxes(): ], ) @pytest.mark.parametrize("alpha", (0, 0.5, 0.7, 1)) -def test_draw_segmentation_masks(colors, alpha): +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_draw_segmentation_masks(colors, alpha, device): """This test makes sure that masks draw their corresponding color where they should""" num_masks, h, w = 2, 100, 100 dtype = torch.uint8 - img = torch.randint(0, 256, size=(3, h, w), dtype=dtype) - masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool) + img = torch.randint(0, 256, size=(3, h, w), dtype=dtype, device=device) + masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool, device=device) # For testing we enforce that there's no overlap between the masks. The # current behaviour is that the last mask's color will take priority when @@ -234,7 +235,7 @@ def test_draw_segmentation_masks(colors, alpha): for mask, color in zip(masks, colors): if isinstance(color, str): color = ImageColor.getrgb(color) - color = torch.tensor(color, dtype=dtype) + color = torch.tensor(color, dtype=dtype, device=device) if alpha == 1: assert (out[:, mask] == color[:, None]).all() @@ -245,11 +246,12 @@ def test_draw_segmentation_masks(colors, alpha): torch.testing.assert_close(out[:, mask], interpolated_color, rtol=0.0, atol=1.0) -def test_draw_segmentation_masks_errors(): +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_draw_segmentation_masks_errors(device): h, w = 10, 10 - masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool) - img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8) + masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool, device=device) + img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8, device=device) with pytest.raises(TypeError, match="The image must be a tensor"): utils.draw_segmentation_masks(image="Not A Tensor Image", masks=masks) @@ -281,9 +283,10 @@ def test_draw_segmentation_masks_errors(): utils.draw_segmentation_masks(image=img, masks=masks, colors=bad_colors) -def test_draw_no_segmention_mask(): - img = torch.full((3, 100, 100), 0, dtype=torch.uint8) - masks = torch.full((0, 100, 100), 0, dtype=torch.bool) +@pytest.mark.parametrize("device", cpu_and_cuda()) +def test_draw_no_segmention_mask(device): + img = torch.full((3, 100, 100), 0, dtype=torch.uint8, device=device) + masks = torch.full((0, 100, 100), 0, dtype=torch.bool, device=device) with pytest.warns(UserWarning, match=re.escape("masks doesn't contain any mask. No mask was drawn")): res = utils.draw_segmentation_masks(img, masks) # Check that the function didn't change the image diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py index 1d9dd025254..6f61526f382 100644 --- a/test/transforms_v2_dispatcher_infos.py +++ b/test/transforms_v2_dispatcher_infos.py @@ -138,68 +138,6 @@ def fill_sequence_needs_broadcast(args_kwargs): DISPATCHER_INFOS = [ - DispatcherInfo( - F.horizontal_flip, - kernels={ - datapoints.Image: F.horizontal_flip_image_tensor, - datapoints.Video: F.horizontal_flip_video, - datapoints.BoundingBox: F.horizontal_flip_bounding_box, - datapoints.Mask: F.horizontal_flip_mask, - }, - pil_kernel_info=PILKernelInfo(F.horizontal_flip_image_pil, kernel_name="horizontal_flip_image_pil"), - ), - DispatcherInfo( - F.resize, - kernels={ - datapoints.Image: F.resize_image_tensor, - datapoints.Video: F.resize_video, - datapoints.BoundingBox: F.resize_bounding_box, - datapoints.Mask: F.resize_mask, - }, - pil_kernel_info=PILKernelInfo(F.resize_image_pil), - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - DispatcherInfo( - F.affine, - kernels={ - datapoints.Image: F.affine_image_tensor, - datapoints.Video: F.affine_video, - datapoints.BoundingBox: F.affine_bounding_box, - datapoints.Mask: F.affine_mask, - }, - pil_kernel_info=PILKernelInfo(F.affine_image_pil), - test_marks=[ - *xfails_pil_if_fill_sequence_needs_broadcast, - xfail_jit_python_scalar_arg("shear"), - xfail_jit_python_scalar_arg("fill"), - ], - ), - DispatcherInfo( - F.vertical_flip, - kernels={ - datapoints.Image: F.vertical_flip_image_tensor, - datapoints.Video: F.vertical_flip_video, - datapoints.BoundingBox: F.vertical_flip_bounding_box, - datapoints.Mask: F.vertical_flip_mask, - }, - pil_kernel_info=PILKernelInfo(F.vertical_flip_image_pil, kernel_name="vertical_flip_image_pil"), - ), - DispatcherInfo( - F.rotate, - kernels={ - datapoints.Image: F.rotate_image_tensor, - datapoints.Video: F.rotate_video, - datapoints.BoundingBox: F.rotate_bounding_box, - datapoints.Mask: F.rotate_mask, - }, - pil_kernel_info=PILKernelInfo(F.rotate_image_pil), - test_marks=[ - xfail_jit_python_scalar_arg("fill"), - *xfails_pil_if_fill_sequence_needs_broadcast, - ], - ), DispatcherInfo( F.crop, kernels={ diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py index 7b877fb092d..dc04fbfc7a9 100644 --- a/test/transforms_v2_kernel_infos.py +++ b/test/transforms_v2_kernel_infos.py @@ -1,7 +1,6 @@ import decimal import functools import itertools -import math import numpy as np import PIL.Image @@ -12,6 +11,7 @@ from common_utils import ( ArgsKwargs, combinations_grid, + DEFAULT_PORTRAIT_SPATIAL_SIZE, get_num_channels, ImageLoader, InfoBase, @@ -156,301 +156,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None): KERNEL_INFOS = [] -def sample_inputs_horizontal_flip_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], dtypes=[torch.float32]): - yield ArgsKwargs(image_loader) - - -def reference_inputs_horizontal_flip_image_tensor(): - for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]): - yield ArgsKwargs(image_loader) - - -def sample_inputs_horizontal_flip_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders( - formats=[datapoints.BoundingBoxFormat.XYXY], dtypes=[torch.float32] - ): - yield ArgsKwargs( - bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size - ) - - -def sample_inputs_horizontal_flip_mask(): - for image_loader in make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]): - yield ArgsKwargs(image_loader) - - -def sample_inputs_horizontal_flip_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): - yield ArgsKwargs(video_loader) - - -def reference_horizontal_flip_bounding_box(bounding_box, *, format, spatial_size): - affine_matrix = np.array( - [ - [-1, 0, spatial_size[1]], - [0, 1, 0], - ], - dtype="float64" if bounding_box.dtype == torch.float64 else "float32", - ) - - expected_bboxes = reference_affine_bounding_box_helper( - bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix - ) - - return expected_bboxes - - -def reference_inputs_flip_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(extra_dims=[()]): - yield ArgsKwargs( - bounding_box_loader, - format=bounding_box_loader.format, - spatial_size=bounding_box_loader.spatial_size, - ) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.horizontal_flip_image_tensor, - kernel_name="horizontal_flip_image_tensor", - sample_inputs_fn=sample_inputs_horizontal_flip_image_tensor, - reference_fn=pil_reference_wrapper(F.horizontal_flip_image_pil), - reference_inputs_fn=reference_inputs_horizontal_flip_image_tensor, - float32_vs_uint8=True, - ), - KernelInfo( - F.horizontal_flip_bounding_box, - sample_inputs_fn=sample_inputs_horizontal_flip_bounding_box, - reference_fn=reference_horizontal_flip_bounding_box, - reference_inputs_fn=reference_inputs_flip_bounding_box, - ), - KernelInfo( - F.horizontal_flip_mask, - sample_inputs_fn=sample_inputs_horizontal_flip_mask, - ), - KernelInfo( - F.horizontal_flip_video, - sample_inputs_fn=sample_inputs_horizontal_flip_video, - ), - ] -) - - -def _get_resize_sizes(spatial_size): - height, width = spatial_size - length = max(spatial_size) - yield length - yield [length] - yield (length,) - new_height = int(height * 0.75) - new_width = int(width * 1.25) - yield [new_height, new_width] - yield height, width - - -def sample_inputs_resize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]): - for size in _get_resize_sizes(image_loader.spatial_size): - yield ArgsKwargs(image_loader, size=size) - - for image_loader, interpolation in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"]), - [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], - ): - yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) - - yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25) - - -def sample_inputs_resize_image_tensor_bicubic(): - for image_loader, interpolation in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC] - ): - yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation) - - -@pil_reference_wrapper -def reference_resize_image_tensor(*args, **kwargs): - if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in { - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - }: - raise pytest.UsageError("Anti-aliasing is always active in PIL") - return F.resize_image_pil(*args, **kwargs) - - -def reference_inputs_resize_image_tensor(): - for image_loader, interpolation in itertools.product( - make_image_loaders_for_interpolation(), - [ - F.InterpolationMode.NEAREST, - F.InterpolationMode.NEAREST_EXACT, - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - ], - ): - for size in _get_resize_sizes(image_loader.spatial_size): - yield ArgsKwargs( - image_loader, - size=size, - interpolation=interpolation, - antialias=interpolation - in { - F.InterpolationMode.BILINEAR, - F.InterpolationMode.BICUBIC, - }, - ) - - -def sample_inputs_resize_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(): - for size in _get_resize_sizes(bounding_box_loader.spatial_size): - yield ArgsKwargs(bounding_box_loader, spatial_size=bounding_box_loader.spatial_size, size=size) - - -def sample_inputs_resize_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): - yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1]) - - -def sample_inputs_resize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): - yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1]) - - -def reference_resize_bounding_box(bounding_box, *, spatial_size, size, max_size=None): - old_height, old_width = spatial_size - new_height, new_width = F._geometry._compute_resized_output_size(spatial_size, size=size, max_size=max_size) - - if (old_height, old_width) == (new_height, new_width): - return bounding_box, (old_height, old_width) - - affine_matrix = np.array( - [ - [new_width / old_width, 0, 0], - [0, new_height / old_height, 0], - ], - dtype="float64" if bounding_box.dtype == torch.float64 else "float32", - ) - - expected_bboxes = reference_affine_bounding_box_helper( - bounding_box, - format=bounding_box.format, - spatial_size=(new_height, new_width), - affine_matrix=affine_matrix, - ) - return expected_bboxes, (new_height, new_width) - - -def reference_inputs_resize_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(extra_dims=((), (4,))): - for size in _get_resize_sizes(bounding_box_loader.spatial_size): - yield ArgsKwargs(bounding_box_loader, size=size, spatial_size=bounding_box_loader.spatial_size) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.resize_image_tensor, - sample_inputs_fn=sample_inputs_resize_image_tensor, - reference_fn=reference_resize_image_tensor, - reference_inputs_fn=reference_inputs_resize_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(10, mae=True), - **cuda_vs_cpu_pixel_difference(), - **float32_vs_uint8_pixel_difference(1, mae=True), - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_image_tensor, - sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic, - reference_fn=reference_resize_image_tensor, - reference_inputs_fn=reference_inputs_resize_image_tensor, - float32_vs_uint8=True, - closeness_kwargs={ - **pil_reference_pixel_difference(10, mae=True), - **cuda_vs_cpu_pixel_difference(atol=30), - **float32_vs_uint8_pixel_difference(1, mae=True), - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_bounding_box, - sample_inputs_fn=sample_inputs_resize_bounding_box, - reference_fn=reference_resize_bounding_box, - reference_inputs_fn=reference_inputs_resize_bounding_box, - closeness_kwargs={ - (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0), - }, - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_mask, - sample_inputs_fn=sample_inputs_resize_mask, - closeness_kwargs=pil_reference_pixel_difference(10), - test_marks=[ - xfail_jit_python_scalar_arg("size"), - ], - ), - KernelInfo( - F.resize_video, - sample_inputs_fn=sample_inputs_resize_video, - closeness_kwargs=cuda_vs_cpu_pixel_difference(), - ), - ] -) - - -_AFFINE_KWARGS = combinations_grid( - angle=[-87, 15, 90], - translate=[(5, 5), (-5, -5)], - scale=[0.77, 1.27], - shear=[(12, 12), (0, 0)], -) - - -def _diversify_affine_kwargs_types(affine_kwargs): - angle = affine_kwargs["angle"] - for diverse_angle in [int(angle), float(angle)]: - yield dict(affine_kwargs, angle=diverse_angle) - - shear = affine_kwargs["shear"] - for diverse_shear in [tuple(shear), list(shear), int(shear[0]), float(shear[0])]: - yield dict(affine_kwargs, shear=diverse_shear) - - -def _full_affine_params(**partial_params): - partial_params.setdefault("angle", 0.0) - partial_params.setdefault("translate", [0.0, 0.0]) - partial_params.setdefault("scale", 1.0) - partial_params.setdefault("shear", [0.0, 0.0]) - partial_params.setdefault("center", None) - return partial_params - - -_DIVERSE_AFFINE_PARAMS = [ - _full_affine_params(**{name: arg}) - for name, args in [ - ("angle", [1.0, 2]), - ("translate", [[1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]), - ("scale", [0.5]), - ("shear", [1.0, 2, [1.0], [2], (1.0,), (2,), [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]), - ("center", [None, [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]), - ] - for arg in args -] - - def get_fills(*, num_channels, dtype): yield None @@ -481,72 +186,6 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs): return other_args, dict(kwargs, fill=fill) -def sample_inputs_affine_image_tensor(): - make_affine_image_loaders = functools.partial( - make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32] - ) - - for image_loader, affine_params in itertools.product(make_affine_image_loaders(), _DIVERSE_AFFINE_PARAMS): - yield ArgsKwargs(image_loader, **affine_params) - - for image_loader in make_affine_image_loaders(): - for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): - yield ArgsKwargs(image_loader, **_full_affine_params(), fill=fill) - - for image_loader, interpolation in itertools.product( - make_affine_image_loaders(), - [ - F.InterpolationMode.NEAREST, - F.InterpolationMode.BILINEAR, - ], - ): - yield ArgsKwargs(image_loader, **_full_affine_params(), fill=0) - - -def reference_inputs_affine_image_tensor(): - for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS): - yield ArgsKwargs( - image_loader, - interpolation=F.InterpolationMode.NEAREST, - **affine_kwargs, - ) - - -def sample_inputs_affine_bounding_box(): - for bounding_box_loader, affine_params in itertools.product( - make_bounding_box_loaders(formats=[datapoints.BoundingBoxFormat.XYXY]), _DIVERSE_AFFINE_PARAMS - ): - yield ArgsKwargs( - bounding_box_loader, - format=bounding_box_loader.format, - spatial_size=bounding_box_loader.spatial_size, - **affine_params, - ) - - -def _compute_affine_matrix(angle, translate, scale, shear, center): - rot = math.radians(angle) - cx, cy = center - tx, ty = translate - sx, sy = [math.radians(sh_) for sh_ in shear] - - c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]]) - t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) - c_matrix_inv = np.linalg.inv(c_matrix) - rs_matrix = np.array( - [ - [scale * math.cos(rot), -scale * math.sin(rot), 0], - [scale * math.sin(rot), scale * math.cos(rot), 0], - [0, 0, 1], - ] - ) - shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]]) - shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]]) - rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix)) - true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv))) - return true_matrix - - def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix): def transform(bbox, affine_matrix_, format_, spatial_size_): # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1 @@ -597,81 +236,6 @@ def transform(bbox, affine_matrix_, format_, spatial_size_): return expected_bboxes -def reference_affine_bounding_box(bounding_box, *, format, spatial_size, angle, translate, scale, shear, center=None): - if center is None: - center = [s * 0.5 for s in spatial_size[::-1]] - - affine_matrix = _compute_affine_matrix(angle, translate, scale, shear, center) - affine_matrix = affine_matrix[:2, :] - - expected_bboxes = reference_affine_bounding_box_helper( - bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix - ) - - return expected_bboxes - - -def reference_inputs_affine_bounding_box(): - for bounding_box_loader, affine_kwargs in itertools.product( - make_bounding_box_loaders(extra_dims=[()]), - _AFFINE_KWARGS, - ): - yield ArgsKwargs( - bounding_box_loader, - format=bounding_box_loader.format, - spatial_size=bounding_box_loader.spatial_size, - **affine_kwargs, - ) - - -def sample_inputs_affine_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): - yield ArgsKwargs(mask_loader, **_full_affine_params()) - - -def sample_inputs_affine_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): - yield ArgsKwargs(video_loader, **_full_affine_params()) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.affine_image_tensor, - sample_inputs_fn=sample_inputs_affine_image_tensor, - reference_fn=pil_reference_wrapper(F.affine_image_pil), - reference_inputs_fn=reference_inputs_affine_image_tensor, - float32_vs_uint8=True, - closeness_kwargs=pil_reference_pixel_difference(10, mae=True), - test_marks=[ - xfail_jit_python_scalar_arg("shear"), - xfail_jit_python_scalar_arg("fill"), - ], - ), - KernelInfo( - F.affine_bounding_box, - sample_inputs_fn=sample_inputs_affine_bounding_box, - reference_fn=reference_affine_bounding_box, - reference_inputs_fn=reference_inputs_affine_bounding_box, - test_marks=[ - xfail_jit_python_scalar_arg("shear"), - ], - ), - KernelInfo( - F.affine_mask, - sample_inputs_fn=sample_inputs_affine_mask, - test_marks=[ - xfail_jit_python_scalar_arg("shear"), - ], - ), - KernelInfo( - F.affine_video, - sample_inputs_fn=sample_inputs_affine_video, - ), - ] -) - - def sample_inputs_convert_format_bounding_box(): formats = list(datapoints.BoundingBoxFormat) for bounding_box_loader, new_format in itertools.product(make_bounding_box_loaders(formats=formats), formats): @@ -697,205 +261,13 @@ def reference_inputs_convert_format_bounding_box(): reference_fn=reference_convert_format_bounding_box, reference_inputs_fn=reference_inputs_convert_format_bounding_box, logs_usage=True, + closeness_kwargs={ + (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0), + }, ), ) -def sample_inputs_vertical_flip_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], dtypes=[torch.float32]): - yield ArgsKwargs(image_loader) - - -def reference_inputs_vertical_flip_image_tensor(): - for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]): - yield ArgsKwargs(image_loader) - - -def sample_inputs_vertical_flip_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders( - formats=[datapoints.BoundingBoxFormat.XYXY], dtypes=[torch.float32] - ): - yield ArgsKwargs( - bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size - ) - - -def sample_inputs_vertical_flip_mask(): - for image_loader in make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]): - yield ArgsKwargs(image_loader) - - -def sample_inputs_vertical_flip_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): - yield ArgsKwargs(video_loader) - - -def reference_vertical_flip_bounding_box(bounding_box, *, format, spatial_size): - affine_matrix = np.array( - [ - [1, 0, 0], - [0, -1, spatial_size[0]], - ], - dtype="float64" if bounding_box.dtype == torch.float64 else "float32", - ) - - expected_bboxes = reference_affine_bounding_box_helper( - bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix - ) - - return expected_bboxes - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.vertical_flip_image_tensor, - kernel_name="vertical_flip_image_tensor", - sample_inputs_fn=sample_inputs_vertical_flip_image_tensor, - reference_fn=pil_reference_wrapper(F.vertical_flip_image_pil), - reference_inputs_fn=reference_inputs_vertical_flip_image_tensor, - float32_vs_uint8=True, - ), - KernelInfo( - F.vertical_flip_bounding_box, - sample_inputs_fn=sample_inputs_vertical_flip_bounding_box, - reference_fn=reference_vertical_flip_bounding_box, - reference_inputs_fn=reference_inputs_flip_bounding_box, - ), - KernelInfo( - F.vertical_flip_mask, - sample_inputs_fn=sample_inputs_vertical_flip_mask, - ), - KernelInfo( - F.vertical_flip_video, - sample_inputs_fn=sample_inputs_vertical_flip_video, - ), - ] -) - -_ROTATE_ANGLES = [-87, 15, 90] - - -def sample_inputs_rotate_image_tensor(): - make_rotate_image_loaders = functools.partial( - make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32] - ) - - for image_loader in make_rotate_image_loaders(): - yield ArgsKwargs(image_loader, angle=15.0, expand=True) - - for image_loader, center in itertools.product( - make_rotate_image_loaders(), [None, [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)] - ): - yield ArgsKwargs(image_loader, angle=15.0, center=center) - - for image_loader in make_rotate_image_loaders(): - for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): - yield ArgsKwargs(image_loader, angle=15.0, fill=fill) - - for image_loader, interpolation in itertools.product( - make_rotate_image_loaders(), - [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR], - ): - yield ArgsKwargs(image_loader, angle=15.0, fill=0) - - -def reference_inputs_rotate_image_tensor(): - for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES): - yield ArgsKwargs(image_loader, angle=angle) - - -def sample_inputs_rotate_bounding_box(): - for bounding_box_loader in make_bounding_box_loaders(): - yield ArgsKwargs( - bounding_box_loader, - format=bounding_box_loader.format, - spatial_size=bounding_box_loader.spatial_size, - angle=_ROTATE_ANGLES[0], - ) - - -def reference_inputs_rotate_bounding_box(): - for bounding_box_loader, angle in itertools.product( - make_bounding_box_loaders(extra_dims=((), (4,))), _ROTATE_ANGLES - ): - yield ArgsKwargs( - bounding_box_loader, - format=bounding_box_loader.format, - spatial_size=bounding_box_loader.spatial_size, - angle=angle, - ) - - # TODO: add samples with expand=True and center - - -def reference_rotate_bounding_box(bounding_box, *, format, spatial_size, angle, expand=False, center=None): - - if center is None: - center = [spatial_size[1] * 0.5, spatial_size[0] * 0.5] - - a = np.cos(angle * np.pi / 180.0) - b = np.sin(angle * np.pi / 180.0) - cx = center[0] - cy = center[1] - affine_matrix = np.array( - [ - [a, b, cx - cx * a - b * cy], - [-b, a, cy + cx * b - a * cy], - ], - dtype="float64" if bounding_box.dtype == torch.float64 else "float32", - ) - - expected_bboxes = reference_affine_bounding_box_helper( - bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix - ) - return expected_bboxes, spatial_size - - -def sample_inputs_rotate_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): - yield ArgsKwargs(mask_loader, angle=15.0) - - -def sample_inputs_rotate_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): - yield ArgsKwargs(video_loader, angle=15.0) - - -KERNEL_INFOS.extend( - [ - KernelInfo( - F.rotate_image_tensor, - sample_inputs_fn=sample_inputs_rotate_image_tensor, - reference_fn=pil_reference_wrapper(F.rotate_image_pil), - reference_inputs_fn=reference_inputs_rotate_image_tensor, - float32_vs_uint8=True, - closeness_kwargs=pil_reference_pixel_difference(1, mae=True), - test_marks=[ - xfail_jit_python_scalar_arg("fill"), - ], - ), - KernelInfo( - F.rotate_bounding_box, - sample_inputs_fn=sample_inputs_rotate_bounding_box, - reference_fn=reference_rotate_bounding_box, - reference_inputs_fn=reference_inputs_rotate_bounding_box, - closeness_kwargs={ - **scripted_vs_eager_float64_tolerances("cpu", atol=1e-4, rtol=1e-4), - **scripted_vs_eager_float64_tolerances("cuda", atol=1e-4, rtol=1e-4), - }, - ), - KernelInfo( - F.rotate_mask, - sample_inputs_fn=sample_inputs_rotate_mask, - ), - KernelInfo( - F.rotate_video, - sample_inputs_fn=sample_inputs_rotate_video, - ), - ] -) - _CROP_PARAMS = combinations_grid(top=[-8, 0, 9], left=[-8, 0, 9], height=[12, 20], width=[12, 20]) @@ -928,7 +300,7 @@ def sample_inputs_crop_bounding_box(): def sample_inputs_crop_mask(): - for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=[10], num_objects=[5]): yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8) @@ -938,7 +310,7 @@ def reference_inputs_crop_mask(): def sample_inputs_crop_video(): - for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=[3]): yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8) @@ -1047,7 +419,7 @@ def sample_inputs_resized_crop_mask(): def sample_inputs_resized_crop_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0]) @@ -1089,7 +461,7 @@ def sample_inputs_resized_crop_video(): def sample_inputs_pad_image_tensor(): make_pad_image_loaders = functools.partial( - make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32] + make_image_loaders, sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32] ) for image_loader, padding in itertools.product( @@ -1144,7 +516,7 @@ def sample_inputs_pad_bounding_box(): def sample_inputs_pad_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): yield ArgsKwargs(mask_loader, padding=[1]) @@ -1156,7 +528,7 @@ def reference_inputs_pad_mask(): def sample_inputs_pad_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, padding=[1]) @@ -1252,7 +624,7 @@ def pad_xfail_jit_fill_condition(args_kwargs): def sample_inputs_perspective_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): yield ArgsKwargs( image_loader, startpoints=None, endpoints=None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0] @@ -1304,7 +676,7 @@ def sample_inputs_perspective_bounding_box(): def sample_inputs_perspective_mask(): - for mask_loader in make_mask_loaders(sizes=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): yield ArgsKwargs(mask_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(make_detection_mask_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS) @@ -1318,7 +690,7 @@ def reference_inputs_perspective_mask(): def sample_inputs_perspective_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0]) yield ArgsKwargs(make_video_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS) @@ -1377,7 +749,7 @@ def _get_elastic_displacement(spatial_size): def sample_inputs_elastic_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): displacement = _get_elastic_displacement(image_loader.spatial_size) for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype): yield ArgsKwargs(image_loader, displacement=displacement, fill=fill) @@ -1409,13 +781,13 @@ def sample_inputs_elastic_bounding_box(): def sample_inputs_elastic_mask(): - for mask_loader in make_mask_loaders(sizes=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): displacement = _get_elastic_displacement(mask_loader.shape[-2:]) yield ArgsKwargs(mask_loader, displacement=displacement) def sample_inputs_elastic_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): displacement = _get_elastic_displacement(video_loader.shape[-2:]) yield ArgsKwargs(video_loader, displacement=displacement) @@ -1486,7 +858,7 @@ def sample_inputs_center_crop_bounding_box(): def sample_inputs_center_crop_mask(): - for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]): + for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]): height, width = mask_loader.shape[-2:] yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2)) @@ -1499,7 +871,7 @@ def reference_inputs_center_crop_mask(): def sample_inputs_center_crop_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): height, width = video_loader.shape[-2:] yield ArgsKwargs(video_loader, output_size=(height // 2, width // 2)) @@ -1579,7 +951,7 @@ def sample_inputs_gaussian_blur_video(): def sample_inputs_equalize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1640,7 +1012,7 @@ def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_for def sample_inputs_equalize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1663,7 +1035,7 @@ def sample_inputs_equalize_video(): def sample_inputs_invert_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1673,7 +1045,7 @@ def reference_inputs_invert_image_tensor(): def sample_inputs_invert_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1699,7 +1071,7 @@ def sample_inputs_invert_video(): def sample_inputs_posterize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0]) @@ -1712,7 +1084,7 @@ def reference_inputs_posterize_image_tensor(): def sample_inputs_posterize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, bits=_POSTERIZE_BITS[0]) @@ -1742,7 +1114,7 @@ def _get_solarize_thresholds(dtype): def sample_inputs_solarize_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype))) @@ -1757,7 +1129,7 @@ def uint8_to_float32_threshold_adapter(other_args, kwargs): def sample_inputs_solarize_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype))) @@ -1781,7 +1153,7 @@ def sample_inputs_solarize_video(): def sample_inputs_autocontrast_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader) @@ -1791,7 +1163,7 @@ def reference_inputs_autocontrast_image_tensor(): def sample_inputs_autocontrast_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -1821,7 +1193,7 @@ def sample_inputs_autocontrast_video(): def sample_inputs_adjust_sharpness_image_tensor(): for image_loader in make_image_loaders( - sizes=["random", (2, 2)], + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE, (2, 2)], color_spaces=("GRAY", "RGB"), ): yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) @@ -1836,7 +1208,7 @@ def reference_inputs_adjust_sharpness_image_tensor(): def sample_inputs_adjust_sharpness_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0]) @@ -1860,7 +1232,7 @@ def sample_inputs_adjust_sharpness_video(): def sample_inputs_erase_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"]): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]): # FIXME: make the parameters more diverse h, w = 6, 7 v = torch.rand(image_loader.num_channels, h, w) @@ -1868,7 +1240,7 @@ def sample_inputs_erase_image_tensor(): def sample_inputs_erase_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): # FIXME: make the parameters more diverse h, w = 6, 7 v = torch.rand(video_loader.num_channels, h, w) @@ -1893,7 +1265,7 @@ def sample_inputs_erase_video(): def sample_inputs_adjust_brightness_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0]) @@ -1906,7 +1278,7 @@ def reference_inputs_adjust_brightness_image_tensor(): def sample_inputs_adjust_brightness_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0]) @@ -1933,7 +1305,7 @@ def sample_inputs_adjust_brightness_video(): def sample_inputs_adjust_contrast_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) @@ -1946,7 +1318,7 @@ def reference_inputs_adjust_contrast_image_tensor(): def sample_inputs_adjust_contrast_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0]) @@ -1985,7 +1357,7 @@ def sample_inputs_adjust_contrast_video(): def sample_inputs_adjust_gamma_image_tensor(): gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, gamma=gamma, gain=gain) @@ -1999,7 +1371,7 @@ def reference_inputs_adjust_gamma_image_tensor(): def sample_inputs_adjust_gamma_video(): gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0] - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, gamma=gamma, gain=gain) @@ -2029,7 +1401,7 @@ def sample_inputs_adjust_gamma_video(): def sample_inputs_adjust_hue_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) @@ -2042,7 +1414,7 @@ def reference_inputs_adjust_hue_image_tensor(): def sample_inputs_adjust_hue_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, hue_factor=_ADJUST_HUE_FACTORS[0]) @@ -2071,7 +1443,7 @@ def sample_inputs_adjust_hue_video(): def sample_inputs_adjust_saturation_image_tensor(): - for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")): + for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")): yield ArgsKwargs(image_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) @@ -2084,7 +1456,7 @@ def reference_inputs_adjust_saturation_image_tensor(): def sample_inputs_adjust_saturation_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0]) @@ -2244,7 +1616,7 @@ def wrapper(input_tensor, *other_args, **kwargs): def sample_inputs_normalize_image_tensor(): for image_loader, (mean, std) in itertools.product( - make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]), + make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]), _NORMALIZE_MEANS_STDS, ): yield ArgsKwargs(image_loader, mean=mean, std=std) @@ -2269,7 +1641,7 @@ def reference_inputs_normalize_image_tensor(): def sample_inputs_normalize_video(): mean, std = _NORMALIZE_MEANS_STDS[0] for video_loader in make_video_loaders( - sizes=["random"], color_spaces=["RGB"], num_frames=["random"], dtypes=[torch.float32] + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[3], dtypes=[torch.float32] ): yield ArgsKwargs(video_loader, mean=mean, std=std) @@ -2303,7 +1675,9 @@ def sample_inputs_convert_dtype_image_tensor(): # conversion cannot be performed safely continue - for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[input_dtype]): + for image_loader in make_image_loaders( + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[input_dtype] + ): yield ArgsKwargs(image_loader, dtype=output_dtype) @@ -2368,7 +1742,7 @@ def reference_inputs_convert_dtype_image_tensor(): def sample_inputs_convert_dtype_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]): yield ArgsKwargs(video_loader) @@ -2413,7 +1787,7 @@ def sample_inputs_convert_dtype_video(): def sample_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=["random"], num_frames=[4]): + for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[4]): yield ArgsKwargs(video_loader, num_samples=2) @@ -2429,7 +1803,9 @@ def reference_uniform_temporal_subsample_video(x, num_samples): def reference_inputs_uniform_temporal_subsample_video(): - for video_loader in make_video_loaders(sizes=["random"], color_spaces=["RGB"], num_frames=[10]): + for video_loader in make_video_loaders( + sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[10] + ): for num_samples in range(1, video_loader.shape[-4] + 1): yield ArgsKwargs(video_loader, num_samples) diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp index b1ceaf1badd..d27eafe45a7 100644 --- a/torchvision/csrc/io/image/cpu/decode_png.cpp +++ b/torchvision/csrc/io/image/cpu/decode_png.cpp @@ -49,6 +49,7 @@ torch::Tensor decode_png( png_destroy_read_struct(&png_ptr, &info_ptr, nullptr); TORCH_CHECK(false, "Internal error."); } + TORCH_CHECK(datap_len >= 8, "Content is too small for png!") auto is_png = !png_sig_cmp(datap, 0, 8); TORCH_CHECK(is_png, "Content is not png!") diff --git a/torchvision/datapoints/_datapoint.py b/torchvision/datapoints/_datapoint.py index fe489d13ea0..0dabec58f25 100644 --- a/torchvision/datapoints/_datapoint.py +++ b/torchvision/datapoints/_datapoint.py @@ -1,7 +1,7 @@ from __future__ import annotations from types import ModuleType -from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, Union +from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, Union import PIL.Image import torch @@ -36,6 +36,7 @@ def wrap_like(cls: Type[D], other: D, tensor: torch.Tensor) -> D: _NO_WRAPPING_EXCEPTIONS = { torch.Tensor.clone: lambda cls, input, output: cls.wrap_like(input, output), torch.Tensor.to: lambda cls, input, output: cls.wrap_like(input, output), + torch.Tensor.detach: lambda cls, input, output: cls.wrap_like(input, output), # We don't need to wrap the output of `Tensor.requires_grad_`, since it is an inplace operation and thus # retains the type automatically torch.Tensor.requires_grad_: lambda cls, input, output: output, @@ -132,6 +133,15 @@ def dtype(self) -> _dtype: # type: ignore[override] with DisableTorchFunctionSubclass(): return super().dtype + def __deepcopy__(self: D, memo: Dict[int, Any]) -> D: + # We need to detach first, since a plain `Tensor.clone` will be part of the computation graph, which does + # *not* happen for `deepcopy(Tensor)`. A side-effect from detaching is that the `Tensor.requires_grad` + # attribute is cleared, so we need to refill it before we return. + # Note: We don't explicitly handle deep-copying of the metadata here. The only metadata we currently have is + # `BoundingBox.format` and `BoundingBox.spatial_size`, which are immutable and thus implicitly deep-copied by + # `BoundingBox.clone()`. + return self.detach().clone().requires_grad_(self.requires_grad) # type: ignore[return-value] + def horizontal_flip(self) -> Datapoint: return self diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py index 220c1ae79d5..b79b4ef4e61 100644 --- a/torchvision/datasets/utils.py +++ b/torchvision/datasets/utils.py @@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str: else: md5 = hashlib.md5() with open(fpath, "rb") as f: - for chunk in iter(lambda: f.read(chunk_size), b""): + while chunk := f.read(chunk_size): md5.update(chunk) return md5.hexdigest() diff --git a/torchvision/models/_api.py b/torchvision/models/_api.py index 51db5c0b23e..0999bf7ba6b 100644 --- a/torchvision/models/_api.py +++ b/torchvision/models/_api.py @@ -1,3 +1,4 @@ +import fnmatch import importlib import inspect import sys @@ -6,7 +7,7 @@ from functools import partial from inspect import signature from types import ModuleType -from typing import Any, Callable, Dict, List, Mapping, Optional, Type, TypeVar, Union +from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set, Type, TypeVar, Union from torch import nn @@ -122,7 +123,9 @@ def get_weight(name: str) -> WeightsEnum: base_module_name = ".".join(sys.modules[__name__].__name__.split(".")[:-1]) base_module = importlib.import_module(base_module_name) model_modules = [base_module] + [ - x[1] for x in inspect.getmembers(base_module, inspect.ismodule) if x[1].__file__.endswith("__init__.py") + x[1] + for x in inspect.getmembers(base_module, inspect.ismodule) + if x[1].__file__.endswith("__init__.py") # type: ignore[union-attr] ] weights_enum = None @@ -201,19 +204,43 @@ def wrapper(fn: Callable[..., M]) -> Callable[..., M]: return wrapper -def list_models(module: Optional[ModuleType] = None) -> List[str]: +def list_models( + module: Optional[ModuleType] = None, + include: Union[Iterable[str], str, None] = None, + exclude: Union[Iterable[str], str, None] = None, +) -> List[str]: """ Returns a list with the names of registered models. Args: module (ModuleType, optional): The module from which we want to extract the available models. + include (str or Iterable[str], optional): Filter(s) for including the models from the set of all models. + Filters are passed to `fnmatch `__ to match Unix shell-style + wildcards. In case of many filters, the results is the union of individual filters. + exclude (str or Iterable[str], optional): Filter(s) applied after include_filters to remove models. + Filter are passed to `fnmatch `__ to match Unix shell-style + wildcards. In case of many filters, the results is removal of all the models that match any individual filter. Returns: models (list): A list with the names of available models. """ - models = [ + all_models = { k for k, v in BUILTIN_MODELS.items() if module is None or v.__module__.rsplit(".", 1)[0] == module.__name__ - ] + } + if include: + models: Set[str] = set() + if isinstance(include, str): + include = [include] + for include_filter in include: + models = models | set(fnmatch.filter(all_models, include_filter)) + else: + models = all_models + + if exclude: + if isinstance(exclude, str): + exclude = [exclude] + for exclude_filter in exclude: + models = models - set(fnmatch.filter(all_models, exclude_filter)) return sorted(models) diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py index aa520e14962..1041d4d149f 100644 --- a/torchvision/models/mobilenetv3.py +++ b/torchvision/models/mobilenetv3.py @@ -378,7 +378,7 @@ def mobilenet_v3_large( weights are used. progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. - **kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3`` + **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3`` base class. Please refer to the `source code `_ for more details about this class. @@ -409,7 +409,7 @@ def mobilenet_v3_small( weights are used. progress (bool, optional): If True, displays a progress bar of the download to stderr. Default is True. - **kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3`` + **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3`` base class. Please refer to the `source code `_ for more details about this class. diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py index 52b85244b3d..3f3322b7a88 100644 --- a/torchvision/models/shufflenetv2.py +++ b/torchvision/models/shufflenetv2.py @@ -35,7 +35,7 @@ def channel_shuffle(x: Tensor, groups: int) -> Tensor: x = torch.transpose(x, 1, 2).contiguous() # flatten - x = x.view(batchsize, -1, height, width) + x = x.view(batchsize, num_channels, height, width) return x diff --git a/torchvision/prototype/datasets/_builtin/README.md b/torchvision/prototype/datasets/_builtin/README.md index 05d61c6870e..3b33100eb81 100644 --- a/torchvision/prototype/datasets/_builtin/README.md +++ b/torchvision/prototype/datasets/_builtin/README.md @@ -91,7 +91,7 @@ import hashlib def sha256sum(path, chunk_size=1024 * 1024): checksum = hashlib.sha256() with open(path, "rb") as f: - for chunk in iter(lambda: f.read(chunk_size), b""): + while chunk := f.read(chunk_size): checksum.update(chunk) print(checksum.hexdigest()) ``` diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py index af4ede38dc0..dadec014b52 100644 --- a/torchvision/prototype/datasets/utils/_resource.py +++ b/torchvision/prototype/datasets/utils/_resource.py @@ -136,7 +136,7 @@ def download(self, root: Union[str, pathlib.Path], *, skip_integrity_check: bool def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None: hash = hashlib.sha256() with open(path, "rb") as file: - for chunk in iter(lambda: file.read(chunk_size), b""): + while chunk := file.read(chunk_size): hash.update(chunk) sha256 = hash.hexdigest() if sha256 != self.sha256: diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py index 2c2f1e19359..3e81005c6d6 100644 --- a/torchvision/transforms/functional.py +++ b/torchvision/transforms/functional.py @@ -1248,7 +1248,7 @@ def affine( # Looks like to_grayscale() is a stand-alone functional that is never called # from the transform classes. Perhaps it's still here for BC? I can't be -# bothered to dig. Anyway, this can be deprecated as we migrate to V2. +# bothered to dig. @torch.jit.unused def to_grayscale(img, num_output_channels=1): """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image. diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py index d0290f93249..38fc417204c 100644 --- a/torchvision/transforms/transforms.py +++ b/torchvision/transforms/transforms.py @@ -199,21 +199,21 @@ def forward(self, image): class ToPILImage: - """Convert a tensor or an ndarray to PIL Image - this does not scale values. + """Convert a tensor or an ndarray to PIL Image This transform does not support torchscript. Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape - H x W x C to a PIL Image while preserving the value range. + H x W x C to a PIL Image while adjusting the value range depending on the ``mode``. Args: mode (`PIL.Image mode`_): color space and pixel depth of input data (optional). If ``mode`` is ``None`` (default) there are some assumptions made about the input data: + - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``. - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``. - If the input has 2 channels, the ``mode`` is assumed to be ``LA``. - - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``, - ``short``). + - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``, ``short``). .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes """ diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py index ffb34c87748..b4803f4f1b9 100644 --- a/torchvision/transforms/v2/functional/__init__.py +++ b/torchvision/transforms/v2/functional/__init__.py @@ -76,6 +76,7 @@ solarize_image_pil, solarize_image_tensor, solarize_video, + to_grayscale, ) from ._geometry import ( affine, @@ -168,4 +169,4 @@ from ._temporal import uniform_temporal_subsample, uniform_temporal_subsample_video from ._type_conversion import pil_to_tensor, to_image_pil, to_image_tensor, to_pil_image -from ._deprecated import get_image_size, to_grayscale, to_tensor # usort: skip +from ._deprecated import get_image_size, to_tensor # usort: skip diff --git a/torchvision/transforms/v2/functional/_color.py b/torchvision/transforms/v2/functional/_color.py index 4ba7e5b36b3..13417e4a990 100644 --- a/torchvision/transforms/v2/functional/_color.py +++ b/torchvision/transforms/v2/functional/_color.py @@ -56,6 +56,11 @@ def rgb_to_grayscale( ) +# `to_grayscale` actually predates `rgb_to_grayscale` in v1, but only handles PIL images. Since `rgb_to_grayscale` is a +# superset in terms of functionality and has the same signature, we alias here to avoid disruption. +to_grayscale = rgb_to_grayscale + + def _blend(image1: torch.Tensor, image2: torch.Tensor, ratio: float) -> torch.Tensor: ratio = float(ratio) fp = image1.is_floating_point() diff --git a/torchvision/transforms/v2/functional/_deprecated.py b/torchvision/transforms/v2/functional/_deprecated.py index 954daa97c21..c9a0f647e60 100644 --- a/torchvision/transforms/v2/functional/_deprecated.py +++ b/torchvision/transforms/v2/functional/_deprecated.py @@ -1,27 +1,12 @@ import warnings from typing import Any, List, Union -import PIL.Image import torch from torchvision import datapoints from torchvision.transforms import functional as _F -@torch.jit.unused -def to_grayscale(inpt: PIL.Image.Image, num_output_channels: int = 1) -> PIL.Image.Image: - call = ", num_output_channels=3" if num_output_channels == 3 else "" - replacement = "convert_color_space(..., color_space=datapoints.ColorSpace.GRAY)" - if num_output_channels == 3: - replacement = f"convert_color_space({replacement}, color_space=datapoints.ColorSpace.RGB)" - warnings.warn( - f"The function `to_grayscale(...{call})` is deprecated in will be removed in a future release. " - f"Instead, please use `{replacement}`.", - ) - - return _F.to_grayscale(inpt, num_output_channels=num_output_channels) - - @torch.jit.unused def to_tensor(inpt: Any) -> torch.Tensor: warnings.warn( diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py index aab3be24e0b..e1dd2866bc5 100644 --- a/torchvision/transforms/v2/functional/_geometry.py +++ b/torchvision/transforms/v2/functional/_geometry.py @@ -43,7 +43,8 @@ def horizontal_flip_image_tensor(image: torch.Tensor) -> torch.Tensor: return image.flip(-1) -horizontal_flip_image_pil = _FP.hflip +def horizontal_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image: + return _FP.hflip(image) def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor: @@ -92,7 +93,8 @@ def vertical_flip_image_tensor(image: torch.Tensor) -> torch.Tensor: return image.flip(-2) -vertical_flip_image_pil = _FP.vflip +def vertical_flip_image_pil(image: PIL.Image) -> PIL.Image: + return _FP.vflip(image) def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor: @@ -919,7 +921,6 @@ def rotate_image_pil( if center is not None and expand: warnings.warn("The provided center argument has no effect on the result if expand is True") - center = None return _FP.rotate( image, angle, interpolation=pil_modes_mapping[interpolation], expand=expand, fill=fill, center=center @@ -936,7 +937,6 @@ def rotate_bounding_box( ) -> Tuple[torch.Tensor, Tuple[int, int]]: if center is not None and expand: warnings.warn("The provided center argument has no effect on the result if expand is True") - center = None return _affine_bounding_box_with_expand( bounding_box, diff --git a/torchvision/utils.py b/torchvision/utils.py index 1418656a7f2..6ec19a0e0a1 100644 --- a/torchvision/utils.py +++ b/torchvision/utils.py @@ -304,7 +304,10 @@ def draw_segmentation_masks( return image out_dtype = torch.uint8 - colors = [torch.tensor(color, dtype=out_dtype) for color in _parse_colors(colors, num_objects=num_masks)] + colors = [ + torch.tensor(color, dtype=out_dtype, device=image.device) + for color in _parse_colors(colors, num_objects=num_masks) + ] img_to_draw = image.detach().clone() # TODO: There might be a way to vectorize this