diff --git a/.clang-format b/.clang-format
index 6d0ab740db4..95d60445f4a 100644
--- a/.clang-format
+++ b/.clang-format
@@ -60,9 +60,6 @@ MacroBlockBegin: ''
 MacroBlockEnd:   ''
 MaxEmptyLinesToKeep: 1
 NamespaceIndentation: None
-ObjCBlockIndentWidth: 2
-ObjCSpaceAfterProperty: false
-ObjCSpaceBeforeProtocolList: false
 PenaltyBreakBeforeFirstCallParameter: 1
 PenaltyBreakComment: 300
 PenaltyBreakFirstLessLess: 120
@@ -85,4 +82,11 @@ SpacesInSquareBrackets: false
 Standard:        Cpp11
 TabWidth:        8
 UseTab:          Never
+---
+Language: ObjC
+ColumnLimit: 120
+AlignAfterOpenBracket: Align
+ObjCBlockIndentWidth: 2
+ObjCSpaceAfterProperty: false
+ObjCSpaceBeforeProtocolList: false
 ...
diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index eec93854788..b9754e29b1c 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -7,3 +7,5 @@
 d367a01a18a3ae6bee13d8be3b63fd6a581ea46f
 # Upgrade usort to 1.0.2 and black to 22.3.0 (#5106) 
 6ca9c76adb6daf2695d603ad623a9cf1c4f4806f
+# Fix unnecessary exploded black formatting (#7709)
+a335d916db0694770e8152f41e19195de3134523
diff --git a/.github/scripts/setup-env.sh b/.github/scripts/setup-env.sh
index d102735909e..e4af4e7c61a 100755
--- a/.github/scripts/setup-env.sh
+++ b/.github/scripts/setup-env.sh
@@ -54,7 +54,11 @@ echo '::endgroup::'
 
 if [[ "${OS_TYPE}" == windows && "${GPU_ARCH_TYPE}" == cuda ]]; then
   echo '::group::Install VisualStudio CUDA extensions on Windows'
-  TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations"
+  if [[ "${VC_YEAR:-}" == "2022" ]]; then
+    TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2022/BuildTools/MSBuild/Microsoft/VC/v170/BuildCustomizations"
+  else
+    TARGET_DIR="/c/Program Files (x86)/Microsoft Visual Studio/2019/BuildTools/MSBuild/Microsoft/VC/v160/BuildCustomizations"
+  fi
   mkdir -p "${TARGET_DIR}"
   cp -r "${CUDA_HOME}/MSBuildExtensions/"* "${TARGET_DIR}"
   echo '::endgroup::'
diff --git a/.github/workflows/build-cmake.yml b/.github/workflows/build-cmake.yml
index 06bd4de753e..3871dca340f 100644
--- a/.github/workflows/build-cmake.yml
+++ b/.github/workflows/build-cmake.yml
@@ -74,9 +74,9 @@ jobs:
       script: |
         set -euo pipefail
 
-        source packaging/windows/internal/vc_install_helper.sh
-
         export PYTHON_VERSION=3.8
+        export VC_YEAR=2022
+        export VSDEVCMD_ARGS=""
         export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
         export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
 
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index cd6011b4ad4..22e1a4ac18d 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -98,6 +98,8 @@ jobs:
         set -euxo pipefail
 
         export PYTHON_VERSION=${{ matrix.python-version }}
+        export VC_YEAR=2019
+        export VSDEVCMD_ARGS=""
         export GPU_ARCH_TYPE=${{ matrix.gpu-arch-type }}
         export GPU_ARCH_VERSION=${{ matrix.gpu-arch-version }}
         
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 343df7f1021..762ebf6fce0 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -6,6 +6,8 @@ repos:
       - id: check-toml
       - id: check-yaml
         exclude: packaging/.*
+        args:
+          - --allow-multiple-documents
       - id: mixed-line-ending
         args: [--fix=lf]
       - id: end-of-file-fixer
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 4bb75fe6eeb..7b3e9e8a7f3 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -140,7 +140,7 @@
     "logo_only": True,
     "pytorch_project": "docs",
     "navigation_with_keys": True,
-    "analytics_id": "UA-117752657-2",
+    "analytics_id": "GTM-T8XT4PS",
 }
 
 html_logo = "_static/img/pytorch-logo-dark.svg"
diff --git a/packaging/wheel/relocate.py b/packaging/wheel/relocate.py
index e6a4ef9d458..6a8d35f1a6e 100644
--- a/packaging/wheel/relocate.py
+++ b/packaging/wheel/relocate.py
@@ -2,7 +2,6 @@
 
 import glob
 import hashlib
-import io
 
 # Standard library imports
 import os
@@ -65,21 +64,12 @@
 PYTHON_VERSION = sys.version_info
 
 
-def read_chunks(file, size=io.DEFAULT_BUFFER_SIZE):
-    """Yield pieces of data from a file-like object until EOF."""
-    while True:
-        chunk = file.read(size)
-        if not chunk:
-            break
-        yield chunk
-
-
 def rehash(path, blocksize=1 << 20):
     """Return (hash, length) for path using hashlib.sha256()"""
     h = hashlib.sha256()
     length = 0
     with open(path, "rb") as f:
-        for block in read_chunks(f, size=blocksize):
+        while block := f.read(blocksize):
             length += len(block)
             h.update(block)
     digest = "sha256=" + urlsafe_b64encode(h.digest()).decode("latin1").rstrip("=")
diff --git a/packaging/windows/internal/vc_env_helper.bat b/packaging/windows/internal/vc_env_helper.bat
index e85a372f93d..d3484a66e9f 100644
--- a/packaging/windows/internal/vc_env_helper.bat
+++ b/packaging/windows/internal/vc_env_helper.bat
@@ -1,7 +1,11 @@
 @echo on
 
-set VC_VERSION_LOWER=16
-set VC_VERSION_UPPER=17
+set VC_VERSION_LOWER=17
+set VC_VERSION_UPPER=18
+if "%VC_YEAR%" == "2019" (
+    set VC_VERSION_LOWER=16
+    set VC_VERSION_UPPER=17
+)
 if "%VC_YEAR%" == "2017" (
     set VC_VERSION_LOWER=15
     set VC_VERSION_UPPER=16
diff --git a/packaging/windows/internal/vc_install_helper.sh b/packaging/windows/internal/vc_install_helper.sh
deleted file mode 100644
index 251509ae194..00000000000
--- a/packaging/windows/internal/vc_install_helper.sh
+++ /dev/null
@@ -1,6 +0,0 @@
-#!/bin/bash
-
-set -ex
-
-export VC_YEAR=2019
-export VSDEVCMD_ARGS=""
diff --git a/test/assets/toosmall_png/heapbof.png b/test/assets/toosmall_png/heapbof.png
new file mode 100644
index 00000000000..e720d183342
Binary files /dev/null and b/test/assets/toosmall_png/heapbof.png differ
diff --git a/test/common_utils.py b/test/common_utils.py
index 1d0b82a827c..72ecf104301 100644
--- a/test/common_utils.py
+++ b/test/common_utils.py
@@ -7,9 +7,11 @@
 import os
 import pathlib
 import random
+import re
 import shutil
 import sys
 import tempfile
+import warnings
 from collections import defaultdict
 from subprocess import CalledProcessError, check_output, STDOUT
 from typing import Callable, Sequence, Tuple, Union
@@ -25,7 +27,7 @@
 from torch.testing._comparison import BooleanPair, NonePair, not_close_error_metas, NumberPair, TensorLikePair
 from torchvision import datapoints, io
 from torchvision.transforms._functional_tensor import _max_value as get_max_value
-from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_tensor
+from torchvision.transforms.v2.functional import convert_dtype_image_tensor, to_image_pil, to_image_tensor
 
 
 IN_OSS_CI = any(os.getenv(var) == "true" for var in ["CIRCLECI", "GITHUB_ACTIONS"])
@@ -122,7 +124,7 @@ def disable_console_output():
         yield
 
 
-def cpu_and_gpu():
+def cpu_and_cuda():
     import pytest  # noqa
 
     return ("cpu", pytest.param("cuda", marks=pytest.mark.needs_cuda))
@@ -397,6 +399,9 @@ def load(self, device="cpu"):
         )
 
 
+# new v2 default
+DEFAULT_SIZE = (17, 11)
+# old v2 defaults
 DEFAULT_SQUARE_SPATIAL_SIZE = 15
 DEFAULT_LANDSCAPE_SPATIAL_SIZE = (7, 33)
 DEFAULT_PORTRAIT_SPATIAL_SIZE = (31, 9)
@@ -404,13 +409,12 @@ def load(self, device="cpu"):
     DEFAULT_LANDSCAPE_SPATIAL_SIZE,
     DEFAULT_PORTRAIT_SPATIAL_SIZE,
     DEFAULT_SQUARE_SPATIAL_SIZE,
-    "random",
 )
 
 
 def _parse_spatial_size(size, *, name="size"):
     if size == "random":
-        return tuple(torch.randint(15, 33, (2,)).tolist())
+        raise ValueError("This should never happen")
     elif isinstance(size, int) and size > 0:
         return (size, size)
     elif (
@@ -490,8 +494,40 @@ def get_num_channels(color_space):
     return num_channels
 
 
+def make_image(
+    size=DEFAULT_SIZE,
+    *,
+    color_space="RGB",
+    batch_dims=(),
+    dtype=None,
+    device="cpu",
+    memory_format=torch.contiguous_format,
+):
+    max_value = get_max_value(dtype)
+    data = torch.testing.make_tensor(
+        (*batch_dims, get_num_channels(color_space), *size),
+        low=0,
+        high=max_value,
+        dtype=dtype or torch.uint8,
+        device=device,
+        memory_format=memory_format,
+    )
+    if color_space in {"GRAY_ALPHA", "RGBA"}:
+        data[..., -1, :, :] = max_value
+
+    return datapoints.Image(data)
+
+
+def make_image_tensor(*args, **kwargs):
+    return make_image(*args, **kwargs).as_subclass(torch.Tensor)
+
+
+def make_image_pil(*args, **kwargs):
+    return to_image_pil(make_image(*args, **kwargs))
+
+
 def make_image_loader(
-    size="random",
+    size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
     *,
     color_space="RGB",
     extra_dims=(),
@@ -499,24 +535,25 @@ def make_image_loader(
     constant_alpha=True,
     memory_format=torch.contiguous_format,
 ):
+    if not constant_alpha:
+        raise ValueError("This should never happen")
     size = _parse_spatial_size(size)
     num_channels = get_num_channels(color_space)
 
     def fn(shape, dtype, device, memory_format):
-        max_value = get_max_value(dtype)
-        data = torch.testing.make_tensor(
-            shape, low=0, high=max_value, dtype=dtype, device=device, memory_format=memory_format
+        *batch_dims, _, height, width = shape
+        return make_image(
+            (height, width),
+            color_space=color_space,
+            batch_dims=batch_dims,
+            dtype=dtype,
+            device=device,
+            memory_format=memory_format,
         )
-        if color_space in {"GRAY_ALPHA", "RGBA"} and constant_alpha:
-            data[..., -1, :, :] = max_value
-        return datapoints.Image(data)
 
     return ImageLoader(fn, shape=(*extra_dims, num_channels, *size), dtype=dtype, memory_format=memory_format)
 
 
-make_image = from_loader(make_image_loader)
-
-
 def make_image_loaders(
     *,
     sizes=DEFAULT_SPATIAL_SIZES,
@@ -538,7 +575,7 @@ def make_image_loaders(
 
 
 def make_image_loader_for_interpolation(
-    size="random", *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format
+    size=(233, 147), *, color_space="RGB", dtype=torch.uint8, memory_format=torch.contiguous_format
 ):
     size = _parse_spatial_size(size)
     num_channels = get_num_channels(color_space)
@@ -587,76 +624,114 @@ class BoundingBoxLoader(TensorLoader):
     spatial_size: Tuple[int, int]
 
 
-def randint_with_tensor_bounds(arg1, arg2=None, **kwargs):
-    low, high = torch.broadcast_tensors(
-        *[torch.as_tensor(arg) for arg in ((0, arg1) if arg2 is None else (arg1, arg2))]
+def make_bounding_box(
+    size=None,
+    *,
+    format=datapoints.BoundingBoxFormat.XYXY,
+    spatial_size=None,
+    batch_dims=(),
+    dtype=None,
+    device="cpu",
+):
+    """
+    size: Size of the actual bounding box, i.e.
+        - (box[3] - box[1], box[2] - box[0]) for XYXY
+        - (H, W) for XYWH and CXCYWH
+    spatial_size: Size of the reference object, e.g. an image. Corresponds to the .spatial_size attribute on
+        returned datapoints.BoundingBox
+
+    To generate a valid joint sample, you need to set spatial_size here to the same value as size on the other maker
+    functions, e.g.
+
+    .. code::
+
+        image = make_image=(size=size)
+        bounding_box = make_bounding_box(spatial_size=size)
+        assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image)
+
+    For convenience, if both size and spatial_size are omitted, spatial_size defaults to the same value as size for all
+    other maker functions, e.g.
+
+    .. code::
+
+        image = make_image=()
+        bounding_box = make_bounding_box()
+        assert F.get_spatial_size(bounding_box) == F.get_spatial_size(image)
+    """
+
+    def sample_position(values, max_value):
+        # We cannot use torch.randint directly here, because it only allows integer scalars as values for low and high.
+        # However, if we have batch_dims, we need tensors as limits.
+        return torch.stack([torch.randint(max_value - v, ()) for v in values.flatten().tolist()]).reshape(values.shape)
+
+    if isinstance(format, str):
+        format = datapoints.BoundingBoxFormat[format]
+
+    if spatial_size is None:
+        if size is None:
+            spatial_size = DEFAULT_SIZE
+        else:
+            height, width = size
+            height_margin, width_margin = torch.randint(10, (2,)).tolist()
+            spatial_size = (height + height_margin, width + width_margin)
+
+    dtype = dtype or torch.float32
+
+    if any(dim == 0 for dim in batch_dims):
+        return datapoints.BoundingBox(
+            torch.empty(*batch_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
+        )
+
+    if size is None:
+        h, w = [torch.randint(1, s, batch_dims) for s in spatial_size]
+    else:
+        h, w = [torch.full(batch_dims, s, dtype=torch.int) for s in size]
+
+    y = sample_position(h, spatial_size[0])
+    x = sample_position(w, spatial_size[1])
+
+    if format is datapoints.BoundingBoxFormat.XYWH:
+        parts = (x, y, w, h)
+    elif format is datapoints.BoundingBoxFormat.XYXY:
+        x1, y1 = x, y
+        x2 = x1 + w
+        y2 = y1 + h
+        parts = (x1, y1, x2, y2)
+    elif format is datapoints.BoundingBoxFormat.CXCYWH:
+        cx = x + w / 2
+        cy = y + h / 2
+        parts = (cx, cy, w, h)
+    else:
+        raise ValueError(f"Format {format} is not supported")
+
+    return datapoints.BoundingBox(
+        torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
     )
-    return torch.stack(
-        [
-            torch.randint(low_scalar, high_scalar, (), **kwargs)
-            for low_scalar, high_scalar in zip(low.flatten().tolist(), high.flatten().tolist())
-        ]
-    ).reshape(low.shape)
 
 
-def make_bounding_box_loader(*, extra_dims=(), format, spatial_size="random", dtype=torch.float32):
+def make_bounding_box_loader(*, extra_dims=(), format, spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE, dtype=torch.float32):
     if isinstance(format, str):
         format = datapoints.BoundingBoxFormat[format]
-    if format not in {
-        datapoints.BoundingBoxFormat.XYXY,
-        datapoints.BoundingBoxFormat.XYWH,
-        datapoints.BoundingBoxFormat.CXCYWH,
-    }:
-        raise pytest.UsageError(f"Can't make bounding box in format {format}")
 
     spatial_size = _parse_spatial_size(spatial_size, name="spatial_size")
 
     def fn(shape, dtype, device):
-        *extra_dims, num_coordinates = shape
+        *batch_dims, num_coordinates = shape
         if num_coordinates != 4:
             raise pytest.UsageError()
 
-        if any(dim == 0 for dim in extra_dims):
-            return datapoints.BoundingBox(
-                torch.empty(*extra_dims, 4, dtype=dtype, device=device), format=format, spatial_size=spatial_size
-            )
-
-        height, width = spatial_size
-
-        if format == datapoints.BoundingBoxFormat.XYXY:
-            x1 = torch.randint(0, width // 2, extra_dims)
-            y1 = torch.randint(0, height // 2, extra_dims)
-            x2 = randint_with_tensor_bounds(x1 + 1, width - x1) + x1
-            y2 = randint_with_tensor_bounds(y1 + 1, height - y1) + y1
-            parts = (x1, y1, x2, y2)
-        elif format == datapoints.BoundingBoxFormat.XYWH:
-            x = torch.randint(0, width // 2, extra_dims)
-            y = torch.randint(0, height // 2, extra_dims)
-            w = randint_with_tensor_bounds(1, width - x)
-            h = randint_with_tensor_bounds(1, height - y)
-            parts = (x, y, w, h)
-        else:  # format == features.BoundingBoxFormat.CXCYWH:
-            cx = torch.randint(1, width - 1, extra_dims)
-            cy = torch.randint(1, height - 1, extra_dims)
-            w = randint_with_tensor_bounds(1, torch.minimum(cx, width - cx) + 1)
-            h = randint_with_tensor_bounds(1, torch.minimum(cy, height - cy) + 1)
-            parts = (cx, cy, w, h)
-
-        return datapoints.BoundingBox(
-            torch.stack(parts, dim=-1).to(dtype=dtype, device=device), format=format, spatial_size=spatial_size
+        return make_bounding_box(
+            format=format, spatial_size=spatial_size, batch_dims=batch_dims, dtype=dtype, device=device
         )
 
     return BoundingBoxLoader(fn, shape=(*extra_dims, 4), dtype=dtype, format=format, spatial_size=spatial_size)
 
 
-make_bounding_box = from_loader(make_bounding_box_loader)
-
-
 def make_bounding_box_loaders(
     *,
     extra_dims=DEFAULT_EXTRA_DIMS,
     formats=tuple(datapoints.BoundingBoxFormat),
-    spatial_size="random",
+    spatial_size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
     dtypes=(torch.float32, torch.float64, torch.int64),
 ):
     for params in combinations_grid(extra_dims=extra_dims, format=formats, dtype=dtypes):
@@ -670,24 +745,35 @@ class MaskLoader(TensorLoader):
     pass
 
 
-def make_detection_mask_loader(size="random", *, num_objects="random", extra_dims=(), dtype=torch.uint8):
+def make_detection_mask(size=DEFAULT_SIZE, *, num_objects=5, batch_dims=(), dtype=None, device="cpu"):
+    """Make a "detection" mask, i.e. (*, N, H, W), where each object is encoded as one of N boolean masks"""
+    return datapoints.Mask(
+        torch.testing.make_tensor(
+            (*batch_dims, num_objects, *size),
+            low=0,
+            high=2,
+            dtype=dtype or torch.bool,
+            device=device,
+        )
+    )
+
+
+def make_detection_mask_loader(size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_objects=5, extra_dims=(), dtype=torch.uint8):
     # This produces "detection" masks, i.e. `(*, N, H, W)`, where `N` denotes the number of objects
     size = _parse_spatial_size(size)
-    num_objects = int(torch.randint(1, 11, ())) if num_objects == "random" else num_objects
 
     def fn(shape, dtype, device):
-        data = torch.testing.make_tensor(shape, low=0, high=2, dtype=dtype, device=device)
-        return datapoints.Mask(data)
+        *batch_dims, num_objects, height, width = shape
+        return make_detection_mask(
+            (height, width), num_objects=num_objects, batch_dims=batch_dims, dtype=dtype, device=device
+        )
 
     return MaskLoader(fn, shape=(*extra_dims, num_objects, *size), dtype=dtype)
 
 
-make_detection_mask = from_loader(make_detection_mask_loader)
-
-
 def make_detection_mask_loaders(
     sizes=DEFAULT_SPATIAL_SIZES,
-    num_objects=(1, 0, "random"),
+    num_objects=(1, 0, 5),
     extra_dims=DEFAULT_EXTRA_DIMS,
     dtypes=(torch.uint8,),
 ):
@@ -698,25 +784,38 @@ def make_detection_mask_loaders(
 make_detection_masks = from_loaders(make_detection_mask_loaders)
 
 
-def make_segmentation_mask_loader(size="random", *, num_categories="random", extra_dims=(), dtype=torch.uint8):
-    # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
-    size = _parse_spatial_size(size)
-    num_categories = int(torch.randint(1, 11, ())) if num_categories == "random" else num_categories
+def make_segmentation_mask(size=DEFAULT_SIZE, *, num_categories=10, batch_dims=(), dtype=None, device="cpu"):
+    """Make a "segmentation" mask, i.e. (*, H, W), where the category is encoded as pixel value"""
+    return datapoints.Mask(
+        torch.testing.make_tensor(
+            (*batch_dims, *size),
+            low=0,
+            high=num_categories,
+            dtype=dtype or torch.uint8,
+            device=device,
+        )
+    )
 
-    def fn(shape, dtype, device):
-        data = torch.testing.make_tensor(shape, low=0, high=num_categories, dtype=dtype, device=device)
-        return datapoints.Mask(data)
 
-    return MaskLoader(fn, shape=(*extra_dims, *size), dtype=dtype)
+def make_segmentation_mask_loader(
+    size=DEFAULT_PORTRAIT_SPATIAL_SIZE, *, num_categories=10, extra_dims=(), dtype=torch.uint8
+):
+    # This produces "segmentation" masks, i.e. `(*, H, W)`, where the category is encoded in the values
+    spatial_size = _parse_spatial_size(size)
 
+    def fn(shape, dtype, device):
+        *batch_dims, height, width = shape
+        return make_segmentation_mask(
+            (height, width), num_categories=num_categories, batch_dims=batch_dims, dtype=dtype, device=device
+        )
 
-make_segmentation_mask = from_loader(make_segmentation_mask_loader)
+    return MaskLoader(fn, shape=(*extra_dims, *spatial_size), dtype=dtype)
 
 
 def make_segmentation_mask_loaders(
     *,
     sizes=DEFAULT_SPATIAL_SIZES,
-    num_categories=(1, 2, "random"),
+    num_categories=(1, 2, 10),
     extra_dims=DEFAULT_EXTRA_DIMS,
     dtypes=(torch.uint8,),
 ):
@@ -730,8 +829,8 @@ def make_segmentation_mask_loaders(
 def make_mask_loaders(
     *,
     sizes=DEFAULT_SPATIAL_SIZES,
-    num_objects=(1, 0, "random"),
-    num_categories=(1, 2, "random"),
+    num_objects=(1, 0, 5),
+    num_categories=(1, 2, 10),
     extra_dims=DEFAULT_EXTRA_DIMS,
     dtypes=(torch.uint8,),
 ):
@@ -748,29 +847,35 @@ class VideoLoader(ImageLoader):
     pass
 
 
+def make_video(size=DEFAULT_SIZE, *, num_frames=3, batch_dims=(), **kwargs):
+    return datapoints.Video(make_image(size, batch_dims=(*batch_dims, num_frames), **kwargs))
+
+
 def make_video_loader(
-    size="random",
+    size=DEFAULT_PORTRAIT_SPATIAL_SIZE,
     *,
     color_space="RGB",
-    num_frames="random",
+    num_frames=3,
     extra_dims=(),
     dtype=torch.uint8,
 ):
     size = _parse_spatial_size(size)
-    num_frames = int(torch.randint(1, 5, ())) if num_frames == "random" else num_frames
 
     def fn(shape, dtype, device, memory_format):
-        video = make_image(
-            size=shape[-2:], extra_dims=shape[:-3], dtype=dtype, device=device, memory_format=memory_format
+        *batch_dims, num_frames, _, height, width = shape
+        return make_video(
+            (height, width),
+            num_frames=num_frames,
+            batch_dims=batch_dims,
+            color_space=color_space,
+            dtype=dtype,
+            device=device,
+            memory_format=memory_format,
         )
-        return datapoints.Video(video)
 
     return VideoLoader(fn, shape=(*extra_dims, num_frames, get_num_channels(color_space), *size), dtype=dtype)
 
 
-make_video = from_loader(make_video_loader)
-
-
 def make_video_loaders(
     *,
     sizes=DEFAULT_SPATIAL_SIZES,
@@ -778,7 +883,7 @@ def make_video_loaders(
         "GRAY",
         "RGB",
     ),
-    num_frames=(1, 0, "random"),
+    num_frames=(1, 0, 3),
     extra_dims=DEFAULT_EXTRA_DIMS,
     dtypes=(torch.uint8, torch.float32, torch.float64),
 ):
@@ -880,3 +985,23 @@ def assert_run_python_script(source_code):
             raise RuntimeError(f"script errored with output:\n{e.output.decode()}")
         if out != b"":
             raise AssertionError(out.decode())
+
+
+@contextlib.contextmanager
+def assert_no_warnings():
+    # The name `catch_warnings` is a misnomer as the context manager does **not** catch any warnings, but rather scopes
+    # the warning filters. All changes that are made to the filters while in this context, will be reset upon exit.
+    with warnings.catch_warnings():
+        warnings.simplefilter("error")
+        yield
+
+
+@contextlib.contextmanager
+def ignore_jit_no_profile_information_warning():
+    # Calling a scripted object often triggers a warning like
+    # `UserWarning: operator() profile_node %$INT1 : int[] = prim::profile_ivalue($INT2) does not have profile information`
+    # with varying `INT1` and `INT2`. Since these are uninteresting for us and only clutter the test summary, we ignore
+    # them.
+    with warnings.catch_warnings():
+        warnings.filterwarnings("ignore", message=re.escape("operator() profile_node %"), category=UserWarning)
+        yield
diff --git a/test/conftest.py b/test/conftest.py
index a9e8f1cda52..468587f1c9e 100644
--- a/test/conftest.py
+++ b/test/conftest.py
@@ -33,7 +33,7 @@ def pytest_collection_modifyitems(items):
         # The needs_cuda mark will exist if the test was explicitly decorated with
         # the @needs_cuda decorator. It will also exist if it was parametrized with a
         # parameter that has the mark: for example if a test is parametrized with
-        # @pytest.mark.parametrize('device', cpu_and_gpu())
+        # @pytest.mark.parametrize('device', cpu_and_cuda())
         # the "instances" of the tests where device == 'cuda' will have the 'needs_cuda' mark,
         # and the ones with device == 'cpu' won't have the mark.
         needs_cuda = item.get_closest_marker("needs_cuda") is not None
diff --git a/test/test_datapoints.py b/test/test_datapoints.py
index 39c05123333..1334fd7283b 100644
--- a/test/test_datapoints.py
+++ b/test/test_datapoints.py
@@ -1,5 +1,8 @@
+from copy import deepcopy
+
 import pytest
 import torch
+from common_utils import assert_equal
 from PIL import Image
 
 from torchvision import datapoints
@@ -30,3 +33,154 @@ def test_bbox_instance(data, format):
     if isinstance(format, str):
         format = datapoints.BoundingBoxFormat[(format.upper())]
     assert bboxes.format == format
+
+
+@pytest.mark.parametrize(
+    ("data", "input_requires_grad", "expected_requires_grad"),
+    [
+        ([[[0.0, 1.0], [0.0, 1.0]]], None, False),
+        ([[[0.0, 1.0], [0.0, 1.0]]], False, False),
+        ([[[0.0, 1.0], [0.0, 1.0]]], True, True),
+        (torch.rand(3, 16, 16, requires_grad=False), None, False),
+        (torch.rand(3, 16, 16, requires_grad=False), False, False),
+        (torch.rand(3, 16, 16, requires_grad=False), True, True),
+        (torch.rand(3, 16, 16, requires_grad=True), None, True),
+        (torch.rand(3, 16, 16, requires_grad=True), False, False),
+        (torch.rand(3, 16, 16, requires_grad=True), True, True),
+    ],
+)
+def test_new_requires_grad(data, input_requires_grad, expected_requires_grad):
+    datapoint = datapoints.Image(data, requires_grad=input_requires_grad)
+    assert datapoint.requires_grad is expected_requires_grad
+
+
+def test_isinstance():
+    assert isinstance(datapoints.Image(torch.rand(3, 16, 16)), torch.Tensor)
+
+
+def test_wrapping_no_copy():
+    tensor = torch.rand(3, 16, 16)
+    image = datapoints.Image(tensor)
+
+    assert image.data_ptr() == tensor.data_ptr()
+
+
+def test_to_wrapping():
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    image_to = image.to(torch.float64)
+
+    assert type(image_to) is datapoints.Image
+    assert image_to.dtype is torch.float64
+
+
+def test_to_datapoint_reference():
+    tensor = torch.rand((3, 16, 16), dtype=torch.float64)
+    image = datapoints.Image(tensor)
+
+    tensor_to = tensor.to(image)
+
+    assert type(tensor_to) is torch.Tensor
+    assert tensor_to.dtype is torch.float64
+
+
+def test_clone_wrapping():
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    image_clone = image.clone()
+
+    assert type(image_clone) is datapoints.Image
+    assert image_clone.data_ptr() != image.data_ptr()
+
+
+def test_requires_grad__wrapping():
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    assert not image.requires_grad
+
+    image_requires_grad = image.requires_grad_(True)
+
+    assert type(image_requires_grad) is datapoints.Image
+    assert image.requires_grad
+    assert image_requires_grad.requires_grad
+
+
+def test_detach_wrapping():
+    image = datapoints.Image(torch.rand(3, 16, 16), requires_grad=True)
+
+    image_detached = image.detach()
+
+    assert type(image_detached) is datapoints.Image
+
+
+def test_other_op_no_wrapping():
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    # any operation besides the ones listed in `Datapoint._NO_WRAPPING_EXCEPTIONS` will do here
+    output = image * 2
+
+    assert type(output) is torch.Tensor
+
+
+@pytest.mark.parametrize(
+    "op",
+    [
+        lambda t: t.numpy(),
+        lambda t: t.tolist(),
+        lambda t: t.max(dim=-1),
+    ],
+)
+def test_no_tensor_output_op_no_wrapping(op):
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    output = op(image)
+
+    assert type(output) is not datapoints.Image
+
+
+def test_inplace_op_no_wrapping():
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    output = image.add_(0)
+
+    assert type(output) is torch.Tensor
+    assert type(image) is datapoints.Image
+
+
+def test_wrap_like():
+    image = datapoints.Image(torch.rand(3, 16, 16))
+
+    # any operation besides the ones listed in `Datapoint._NO_WRAPPING_EXCEPTIONS` will do here
+    output = image * 2
+
+    image_new = datapoints.Image.wrap_like(image, output)
+
+    assert type(image_new) is datapoints.Image
+    assert image_new.data_ptr() == output.data_ptr()
+
+
+@pytest.mark.parametrize(
+    "datapoint",
+    [
+        datapoints.Image(torch.rand(3, 16, 16)),
+        datapoints.Video(torch.rand(2, 3, 16, 16)),
+        datapoints.BoundingBox([0.0, 1.0, 2.0, 3.0], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10)),
+        datapoints.Mask(torch.randint(0, 256, (16, 16), dtype=torch.uint8)),
+    ],
+)
+@pytest.mark.parametrize("requires_grad", [False, True])
+def test_deepcopy(datapoint, requires_grad):
+    if requires_grad and not datapoint.dtype.is_floating_point:
+        return
+
+    datapoint.requires_grad_(requires_grad)
+
+    datapoint_deepcopied = deepcopy(datapoint)
+
+    assert datapoint_deepcopied is not datapoint
+    assert datapoint_deepcopied.data_ptr() != datapoint.data_ptr()
+    assert_equal(datapoint_deepcopied, datapoint)
+
+    assert type(datapoint_deepcopied) is type(datapoint)
+    assert datapoint_deepcopied.requires_grad is requires_grad
+    assert datapoint_deepcopied.is_leaf
diff --git a/test/test_extended_models.py b/test/test_extended_models.py
index 0866cc0f8a3..96a3fc5f8ed 100644
--- a/test/test_extended_models.py
+++ b/test/test_extended_models.py
@@ -103,17 +103,18 @@ def test_weights_deserializable(name):
         assert pickle.loads(pickle.dumps(weights)) is weights
 
 
+def get_models_from_module(module):
+    return [
+        v.__name__
+        for k, v in module.__dict__.items()
+        if callable(v) and k[0].islower() and k[0] != "_" and k not in models._api.__all__
+    ]
+
+
 @pytest.mark.parametrize(
     "module", [models, models.detection, models.quantization, models.segmentation, models.video, models.optical_flow]
 )
 def test_list_models(module):
-    def get_models_from_module(module):
-        return [
-            v.__name__
-            for k, v in module.__dict__.items()
-            if callable(v) and k[0].islower() and k[0] != "_" and k not in models._api.__all__
-        ]
-
     a = set(get_models_from_module(module))
     b = set(x.replace("quantized_", "") for x in models.list_models(module))
 
@@ -121,6 +122,65 @@ def get_models_from_module(module):
     assert a == b
 
 
+@pytest.mark.parametrize(
+    "include_filters",
+    [
+        None,
+        [],
+        (),
+        "",
+        "*resnet*",
+        ["*alexnet*"],
+        "*not-existing-model-for-test?",
+        ["*resnet*", "*alexnet*"],
+        ["*resnet*", "*alexnet*", "*not-existing-model-for-test?"],
+        ("*resnet*", "*alexnet*"),
+        set(["*resnet*", "*alexnet*"]),
+    ],
+)
+@pytest.mark.parametrize(
+    "exclude_filters",
+    [
+        None,
+        [],
+        (),
+        "",
+        "*resnet*",
+        ["*alexnet*"],
+        ["*not-existing-model-for-test?"],
+        ["resnet34", "*not-existing-model-for-test?"],
+        ["resnet34", "*resnet1*"],
+        ("resnet34", "*resnet1*"),
+        set(["resnet34", "*resnet1*"]),
+    ],
+)
+def test_list_models_filters(include_filters, exclude_filters):
+    actual = set(models.list_models(models, include=include_filters, exclude=exclude_filters))
+    classification_models = set(get_models_from_module(models))
+
+    if isinstance(include_filters, str):
+        include_filters = [include_filters]
+    if isinstance(exclude_filters, str):
+        exclude_filters = [exclude_filters]
+
+    if include_filters:
+        expected = set()
+        for include_f in include_filters:
+            include_f = include_f.strip("*?")
+            expected = expected | set(x for x in classification_models if include_f in x)
+    else:
+        expected = classification_models
+
+    if exclude_filters:
+        for exclude_f in exclude_filters:
+            exclude_f = exclude_f.strip("*?")
+            if exclude_f != "":
+                a_exclude = set(x for x in classification_models if exclude_f in x)
+                expected = expected - a_exclude
+
+    assert expected == actual
+
+
 @pytest.mark.parametrize(
     "name, weight",
     [
diff --git a/test/test_functional_tensor.py b/test/test_functional_tensor.py
index 0e1cc648a19..fb3f5744e54 100644
--- a/test/test_functional_tensor.py
+++ b/test/test_functional_tensor.py
@@ -21,7 +21,7 @@
     _create_data_batch,
     _test_fn_on_batch,
     assert_equal,
-    cpu_and_gpu,
+    cpu_and_cuda,
     needs_cuda,
 )
 from torchvision.transforms import InterpolationMode
@@ -34,7 +34,7 @@
 )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("fn", [F.get_image_size, F.get_image_num_channels, F.get_dimensions])
 def test_image_sizes(device, fn):
     script_F = torch.jit.script(fn)
@@ -72,7 +72,7 @@ class TestRotate:
     scripted_rotate = torch.jit.script(F.rotate)
     IMG_W = 26
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(7, 33), (26, IMG_W), (32, IMG_W)])
     @pytest.mark.parametrize(
         "center",
@@ -131,7 +131,7 @@ def test_rotate(self, device, height, width, center, dt, angle, expand, fill, fn
             f"{out_pil_tensor[0, :7, :7]}"
         )
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     def test_rotate_batch(self, device, dt):
         if dt == torch.float16 and device == "cpu":
@@ -157,7 +157,7 @@ class TestAffine:
     ALL_DTYPES = [None, torch.float32, torch.float64, torch.float16]
     scripted_affine = torch.jit.script(F.affine)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     def test_identity_map(self, device, height, width, dt):
@@ -180,7 +180,7 @@ def test_identity_map(self, device, height, width, dt):
         )
         assert_equal(tensor, out_tensor, msg=f"{out_tensor[0, :5, :5]} vs {tensor[0, :5, :5]}")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize(
@@ -224,7 +224,7 @@ def test_square_rotations(self, device, height, width, dt, angle, config, fn):
         # Tolerance : less than 6% of different pixels
         assert ratio_diff_pixels < 0.06
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize("angle", [90, 45, 15, -30, -60, -120])
@@ -258,7 +258,7 @@ def test_rect_rotations(self, device, height, width, dt, angle, fn, center):
         # Tolerance : less than 3% of different pixels
         assert ratio_diff_pixels < 0.03
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize("t", [[10, 12], (-12, -13)])
@@ -283,7 +283,7 @@ def test_translations(self, device, height, width, dt, t, fn):
 
         _assert_equal_tensor_to_pil(out_tensor, out_pil_img)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("height, width", [(26, 26), (32, 26)])
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     @pytest.mark.parametrize(
@@ -293,24 +293,8 @@ def test_translations(self, device, height, width, dt, t, fn):
             (33, (5, -4), 1.0, [0.0, 0.0], [0, 0, 0]),
             (45, [-5, 4], 1.2, [0.0, 0.0], (1, 2, 3)),
             (33, (-4, -8), 2.0, [0.0, 0.0], [255, 255, 255]),
-            (
-                85,
-                (10, -10),
-                0.7,
-                [0.0, 0.0],
-                [
-                    1,
-                ],
-            ),
-            (
-                0,
-                [0, 0],
-                1.0,
-                [
-                    35.0,
-                ],
-                (2.0,),
-            ),
+            (85, (10, -10), 0.7, [0.0, 0.0], [1]),
+            (0, [0, 0], 1.0, [35.0], (2.0,)),
             (-25, [0, 0], 1.2, [0.0, 15.0], None),
             (-45, [-10, 0], 0.7, [2.0, 5.0], None),
             (-45, [-10, -10], 1.2, [4.0, 5.0], None),
@@ -344,7 +328,7 @@ def test_all_ops(self, device, height, width, dt, a, t, s, sh, f, fn):
         tol = 0.06 if device == "cuda" else 0.05
         assert ratio_diff_pixels < tol
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dt", ALL_DTYPES)
     def test_batches(self, device, dt):
         if dt == torch.float16 and device == "cpu":
@@ -357,7 +341,7 @@ def test_batches(self, device, dt):
 
         _test_fn_on_batch(batch_tensors, F.affine, angle=-43, translate=[-3, 4], scale=1.2, shear=[4.0, 5.0])
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_interpolation_type(self, device):
         tensor, pil_img = _create_data(26, 26, device=device)
 
@@ -389,22 +373,10 @@ def _get_data_dims_and_points_for_perspective():
     return dims_and_points
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
-@pytest.mark.parametrize(
-    "fill",
-    (
-        None,
-        [0, 0, 0],
-        [1, 2, 3],
-        [255, 255, 255],
-        [
-            1,
-        ],
-        (2.0,),
-    ),
-)
+@pytest.mark.parametrize("fill", (None, [0, 0, 0], [1, 2, 3], [255, 255, 255], [1], (2.0,)))
 @pytest.mark.parametrize("fn", [F.perspective, torch.jit.script(F.perspective)])
 def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn):
 
@@ -435,7 +407,7 @@ def test_perspective_pil_vs_tensor(device, dims_and_points, dt, fill, fn):
     assert ratio_diff_pixels < 0.05
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dims_and_points", _get_data_dims_and_points_for_perspective())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 def test_perspective_batch(device, dims_and_points, dt):
@@ -473,21 +445,9 @@ def test_perspective_interpolation_type():
     assert_equal(res1, res2)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
-@pytest.mark.parametrize(
-    "size",
-    [
-        32,
-        26,
-        [
-            32,
-        ],
-        [32, 32],
-        (32, 32),
-        [26, 35],
-    ],
-)
+@pytest.mark.parametrize("size", [32, 26, [32], [32, 32], (32, 32), [26, 35]])
 @pytest.mark.parametrize("max_size", [None, 34, 40, 1000])
 @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC, NEAREST, NEAREST_EXACT])
 def test_resize(device, dt, size, max_size, interpolation):
@@ -539,7 +499,7 @@ def test_resize(device, dt, size, max_size, interpolation):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_resize_asserts(device):
 
     tensor, pil_img = _create_data(26, 36, device=device)
@@ -556,7 +516,7 @@ def test_resize_asserts(device):
             F.resize(img, size=32, max_size=32)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("size", [[96, 72], [96, 420], [420, 72]])
 @pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC])
@@ -609,21 +569,6 @@ def test_resize_antialias(device, dt, size, interpolation):
     assert_equal(resized_tensor, resize_result)
 
 
-@needs_cuda
-@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC])
-def test_assert_resize_antialias(interpolation):
-
-    # Checks implementation on very large scales
-    # and catch TORCH_CHECK inside PyTorch implementation
-    torch.manual_seed(12)
-    tensor, _ = _create_data(1000, 1000, device="cuda")
-
-    # Error message is not yet updated in pytorch nightly
-    # with pytest.raises(RuntimeError, match=r"Provided interpolation parameters can not be handled"):
-    with pytest.raises(RuntimeError, match=r"Too much shared memory required"):
-        F.resize(tensor, size=(5, 5), interpolation=interpolation, antialias=True)
-
-
 def test_resize_antialias_default_warning():
 
     img = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8)
@@ -641,25 +586,6 @@ def test_resize_antialias_default_warning():
         F.resized_crop(img, 0, 0, 10, 10, size=(20, 20), interpolation=NEAREST)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
-@pytest.mark.parametrize("dt", [torch.float32, torch.float64, torch.float16])
-@pytest.mark.parametrize("size", [[10, 7], [10, 42], [42, 7]])
-@pytest.mark.parametrize("interpolation", [BILINEAR, BICUBIC])
-def test_interpolate_antialias_backward(device, dt, size, interpolation):
-
-    if dt == torch.float16 and device == "cpu":
-        # skip float16 on CPU case
-        return
-
-    torch.manual_seed(12)
-    x = (torch.rand(1, 32, 29, 3, dtype=torch.double, device=device).permute(0, 3, 1, 2).requires_grad_(True),)
-    resize = partial(F.resize, size=size, interpolation=interpolation, antialias=True)
-    assert torch.autograd.gradcheck(resize, x, eps=1e-8, atol=1e-6, rtol=1e-6, fast_mode=False)
-
-    x = (torch.rand(1, 3, 32, 29, dtype=torch.double, device=device, requires_grad=True),)
-    assert torch.autograd.gradcheck(resize, x, eps=1e-8, atol=1e-6, rtol=1e-6, fast_mode=False)
-
-
 def check_functional_vs_PIL_vs_scripted(
     fn, fn_pil, fn_t, config, device, dtype, channels=3, tol=2.0 + 1e-10, agg_method="max"
 ):
@@ -697,7 +623,7 @@ def check_functional_vs_PIL_vs_scripted(
     _test_fn_on_batch(batch_tensors, fn, scripted_fn_atol=atol, **config)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"brightness_factor": f} for f in (0.1, 0.5, 1.0, 1.34, 2.5)])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -713,7 +639,7 @@ def test_adjust_brightness(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("channels", [1, 3])
 def test_invert(device, dtype, channels):
@@ -722,7 +648,7 @@ def test_invert(device, dtype, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("config", [{"bits": bits} for bits in range(0, 8)])
 @pytest.mark.parametrize("channels", [1, 3])
 def test_posterize(device, config, channels):
@@ -739,7 +665,7 @@ def test_posterize(device, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0, 64, 128, 192, 255]])
 @pytest.mark.parametrize("channels", [1, 3])
 def test_solarize1(device, config, channels):
@@ -756,7 +682,7 @@ def test_solarize1(device, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"threshold": threshold} for threshold in [0.0, 0.25, 0.5, 0.75, 1.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -788,7 +714,7 @@ def test_solarize2(device, dtype, config, channels):
         *[(torch.int64, threshold) for threshold in [0, 2**32, 2**63 - 1]],
     ],
 )
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_solarize_threshold_within_bound(threshold, dtype, device):
     make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max)
     img = make_img((3, 12, 23), dtype=dtype, device=device)
@@ -804,7 +730,7 @@ def test_solarize_threshold_within_bound(threshold, dtype, device):
         (torch.int64, 2**64),
     ],
 )
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_solarize_threshold_above_bound(threshold, dtype, device):
     make_img = torch.rand if dtype.is_floating_point else partial(torch.randint, 0, torch.iinfo(dtype).max)
     img = make_img((3, 12, 23), dtype=dtype, device=device)
@@ -812,7 +738,7 @@ def test_solarize_threshold_above_bound(threshold, dtype, device):
         F_t.solarize(img, threshold)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"sharpness_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -828,7 +754,7 @@ def test_adjust_sharpness(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("channels", [1, 3])
 def test_autocontrast(device, dtype, channels):
@@ -837,7 +763,7 @@ def test_autocontrast(device, dtype, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("channels", [1, 3])
 def test_autocontrast_equal_minmax(device, dtype, channels):
@@ -849,7 +775,7 @@ def test_autocontrast_equal_minmax(device, dtype, channels):
     assert (F.autocontrast(a)[0] == F.autocontrast(a[0])).all()
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("channels", [1, 3])
 def test_equalize(device, channels):
     torch.use_deterministic_algorithms(False)
@@ -866,7 +792,7 @@ def test_equalize(device, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"contrast_factor": f} for f in [0.2, 0.5, 1.0, 1.5, 2.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -876,7 +802,7 @@ def test_adjust_contrast(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"saturation_factor": f} for f in [0.5, 0.75, 1.0, 1.5, 2.0]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -886,7 +812,7 @@ def test_adjust_saturation(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"hue_factor": f} for f in [-0.45, -0.25, 0.0, 0.25, 0.45]])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -896,7 +822,7 @@ def test_adjust_hue(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dtype", (None, torch.float32, torch.float64))
 @pytest.mark.parametrize("config", [{"gamma": g1, "gain": g2} for g1, g2 in zip([0.8, 1.0, 1.2], [0.7, 1.0, 1.3])])
 @pytest.mark.parametrize("channels", [1, 3])
@@ -912,7 +838,7 @@ def test_adjust_gamma(device, dtype, config, channels):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("pad", [2, [3], [0, 3], (3, 3), [4, 2, 4, 3]])
 @pytest.mark.parametrize(
@@ -962,7 +888,7 @@ def test_pad(device, dt, pad, config):
     _test_fn_on_batch(batch_tensors, F.pad, padding=script_pad, **config)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("mode", [NEAREST, NEAREST_EXACT, BILINEAR, BICUBIC])
 def test_resized_crop(device, mode):
     # test values of F.resized_crop in several cases:
@@ -997,7 +923,7 @@ def test_resized_crop(device, mode):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "func, args",
     [
@@ -1030,7 +956,7 @@ def test_assert_image_tensor(device, func, args):
         func(tensor, *args)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_vflip(device):
     script_vflip = torch.jit.script(F.vflip)
 
@@ -1047,7 +973,7 @@ def test_vflip(device):
     _test_fn_on_batch(batch_tensors, F.vflip)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_hflip(device):
     script_hflip = torch.jit.script(F.hflip)
 
@@ -1064,7 +990,7 @@ def test_hflip(device):
     _test_fn_on_batch(batch_tensors, F.hflip)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "top, left, height, width",
     [
@@ -1093,7 +1019,7 @@ def test_crop(device, top, left, height, width):
     _test_fn_on_batch(batch_tensors, F.crop, top=top, left=left, height=height, width=width)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("image_size", ("small", "large"))
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)])
@@ -1147,7 +1073,7 @@ def test_gaussian_blur(device, image_size, dt, ksize, sigma, fn):
     torch.testing.assert_close(out, true_out, rtol=0.0, atol=1.0, msg=f"{ksize}, {sigma}")
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_hsv2rgb(device):
     scripted_fn = torch.jit.script(F_t._hsv2rgb)
     shape = (3, 100, 150)
@@ -1178,7 +1104,7 @@ def test_hsv2rgb(device):
     _test_fn_on_batch(batch_tensors, F_t._hsv2rgb)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_rgb2hsv(device):
     scripted_fn = torch.jit.script(F_t._rgb2hsv)
     shape = (3, 150, 100)
@@ -1217,7 +1143,7 @@ def test_rgb2hsv(device):
     _test_fn_on_batch(batch_tensors, F_t._rgb2hsv)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_output_channels", (3, 1))
 def test_rgb_to_grayscale(device, num_output_channels):
     script_rgb_to_grayscale = torch.jit.script(F.rgb_to_grayscale)
@@ -1236,7 +1162,7 @@ def test_rgb_to_grayscale(device, num_output_channels):
     _test_fn_on_batch(batch_tensors, F.rgb_to_grayscale, num_output_channels=num_output_channels)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_center_crop(device):
     script_center_crop = torch.jit.script(F.center_crop)
 
@@ -1254,7 +1180,7 @@ def test_center_crop(device):
     _test_fn_on_batch(batch_tensors, F.center_crop, output_size=[10, 11])
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_five_crop(device):
     script_five_crop = torch.jit.script(F.five_crop)
 
@@ -1288,7 +1214,7 @@ def test_five_crop(device):
         assert_equal(transformed_batch, s_transformed_batch)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_ten_crop(device):
     script_ten_crop = torch.jit.script(F.ten_crop)
 
@@ -1334,7 +1260,7 @@ def test_elastic_transform_asserts():
         _ = F.elastic_transform(img_tensor, displacement=torch.rand(1, 2))
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR, BICUBIC])
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize(
diff --git a/test/test_image.py b/test/test_image.py
index 4c210ea7eef..b08dc2026d4 100644
--- a/test/test_image.py
+++ b/test/test_image.py
@@ -32,6 +32,7 @@
 DAMAGED_PNG = os.path.join(IMAGE_ROOT, "damaged_png")
 ENCODE_JPEG = os.path.join(IMAGE_ROOT, "encode_jpeg")
 INTERLACED_PNG = os.path.join(IMAGE_ROOT, "interlaced_png")
+TOOSMALL_PNG = os.path.join(IMAGE_ROOT, "toosmall_png")
 IS_WINDOWS = sys.platform in ("win32", "cygwin")
 PILLOW_VERSION = tuple(int(x) for x in PILLOW_VERSION.split("."))
 
@@ -193,6 +194,8 @@ def test_decode_png_errors():
         decode_png(torch.randint(3, 5, (300,), dtype=torch.uint8))
     with pytest.raises(RuntimeError, match="Out of bound read in decode_png"):
         decode_png(read_file(os.path.join(DAMAGED_PNG, "sigsegv.png")))
+    with pytest.raises(RuntimeError, match="Content is too small for png"):
+        decode_png(read_file(os.path.join(TOOSMALL_PNG, "heapbof.png")))
 
 
 @pytest.mark.parametrize(
diff --git a/test/test_models.py b/test/test_models.py
index f6eeb7c28c8..67eb2115c85 100644
--- a/test/test_models.py
+++ b/test/test_models.py
@@ -15,7 +15,7 @@
 import torch.fx
 import torch.nn as nn
 from _utils_internal import get_relative_path
-from common_utils import cpu_and_gpu, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed
+from common_utils import cpu_and_cuda, freeze_rng_state, map_nested_tensor_object, needs_cuda, set_rng_seed
 from PIL import Image
 from torchvision import models, transforms
 from torchvision.models import get_model_builder, list_models
@@ -676,14 +676,14 @@ def vitc_b_16(**kwargs: Any):
 
 
 @pytest.mark.parametrize("model_fn", [vitc_b_16])
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_vitc_models(model_fn, dev):
     test_classification_model(model_fn, dev)
 
 
 @disable_tf32()  # see: https://github.com/pytorch/vision/issues/7618
 @pytest.mark.parametrize("model_fn", list_model_fns(models))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_classification_model(model_fn, dev):
     set_rng_seed(0)
     defaults = {
@@ -726,7 +726,7 @@ def test_classification_model(model_fn, dev):
 
 
 @pytest.mark.parametrize("model_fn", list_model_fns(models.segmentation))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_segmentation_model(model_fn, dev):
     set_rng_seed(0)
     defaults = {
@@ -791,7 +791,7 @@ def check_out(out):
 
 
 @pytest.mark.parametrize("model_fn", list_model_fns(models.detection))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_detection_model(model_fn, dev):
     set_rng_seed(0)
     defaults = {
@@ -923,7 +923,7 @@ def test_detection_model_validation(model_fn):
 
 
 @pytest.mark.parametrize("model_fn", list_model_fns(models.video))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_video_model(model_fn, dev):
     set_rng_seed(0)
     # the default input shape is
diff --git a/test/test_ops.py b/test/test_ops.py
index 463ebb333ff..b993bce65a2 100644
--- a/test/test_ops.py
+++ b/test/test_ops.py
@@ -10,7 +10,7 @@
 import torch
 import torch.fx
 import torch.nn.functional as F
-from common_utils import assert_equal, cpu_and_gpu, needs_cuda
+from common_utils import assert_equal, cpu_and_cuda, needs_cuda
 from PIL import Image
 from torch import nn, Tensor
 from torch.autograd import gradcheck
@@ -97,7 +97,7 @@ def forward(self, imgs: Tensor, boxes: List[Tensor]) -> Tensor:
 class RoIOpTester(ABC):
     dtype = torch.float64
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, deterministic=False, **kwargs):
         x_dtype = self.dtype if x_dtype is None else x_dtype
@@ -126,7 +126,7 @@ def test_forward(self, device, contiguous, x_dtype=None, rois_dtype=None, determ
         tol = 1e-3 if (x_dtype is torch.half or rois_dtype is torch.half) else 1e-5
         torch.testing.assert_close(gt_y.to(y), y, rtol=tol, atol=tol)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_is_leaf_node(self, device):
         op_obj = self.make_obj(wrap=True).to(device=device)
         graph_node_names = get_graph_node_names(op_obj)
@@ -135,7 +135,7 @@ def test_is_leaf_node(self, device):
         assert len(graph_node_names[0]) == len(graph_node_names[1])
         assert len(graph_node_names[0]) == 1 + op_obj.n_inputs
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.float):
         op_obj = self.make_obj().to(device=device)
         graph_module = torch.fx.symbolic_trace(op_obj)
@@ -155,7 +155,7 @@ def test_torch_fx_trace(self, device, x_dtype=torch.float, rois_dtype=torch.floa
         torch.testing.assert_close(output_gt, output_fx, rtol=tol, atol=tol)
 
     @pytest.mark.parametrize("seed", range(10))
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     def test_backward(self, seed, device, contiguous, deterministic=False):
         torch.random.manual_seed(seed)
@@ -418,7 +418,7 @@ def test_boxes_shape(self):
         self._helper_boxes_shape(ops.roi_align)
 
     @pytest.mark.parametrize("aligned", (True, False))
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("deterministic", (True, False))
     def test_forward(self, device, contiguous, deterministic, aligned, x_dtype=None, rois_dtype=None):
@@ -450,7 +450,7 @@ def test_autocast(self, aligned, deterministic, x_dtype, rois_dtype):
             )
 
     @pytest.mark.parametrize("seed", range(10))
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("deterministic", (True, False))
     def test_backward(self, seed, device, contiguous, deterministic):
@@ -612,7 +612,7 @@ def test_msroialign_repr(self):
         )
         assert repr(t) == expected_string
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_is_leaf_node(self, device):
         op_obj = self.make_obj(wrap=True).to(device=device)
         graph_node_names = get_graph_node_names(op_obj)
@@ -885,7 +885,7 @@ def make_obj(self, in_channels=6, out_channels=2, kernel_size=(3, 2), groups=2,
         )
         return DeformConvModuleWrapper(obj) if wrap else obj
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_is_leaf_node(self, device):
         op_obj = self.make_obj(wrap=True).to(device=device)
         graph_node_names = get_graph_node_names(op_obj)
@@ -894,7 +894,7 @@ def test_is_leaf_node(self, device):
         assert len(graph_node_names[0]) == len(graph_node_names[1])
         assert len(graph_node_names[0]) == 1 + op_obj.n_inputs
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("batch_sz", (0, 33))
     def test_forward(self, device, contiguous, batch_sz, dtype=None):
@@ -946,7 +946,7 @@ def test_wrong_sizes(self):
             wrong_mask = torch.rand_like(mask[:, :2])
             layer(x, offset, wrong_mask)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("contiguous", (True, False))
     @pytest.mark.parametrize("batch_sz", (0, 33))
     def test_backward(self, device, contiguous, batch_sz):
@@ -1411,7 +1411,7 @@ def assert_empty_loss(iou_fn, dtype, device):
 
 class TestGeneralizedBoxIouLoss:
     # We refer to original test: https://github.com/facebookresearch/fvcore/blob/main/tests/test_giou_loss.py
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_giou_loss(self, dtype, device):
         box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device)
@@ -1439,7 +1439,7 @@ def test_giou_loss(self, dtype, device):
         with pytest.raises(ValueError, match="Invalid"):
             ops.generalized_box_iou_loss(box1s, box2s, reduction="xyz")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_empty_inputs(self, dtype, device):
         assert_empty_loss(ops.generalized_box_iou_loss, dtype, device)
@@ -1447,7 +1447,7 @@ def test_empty_inputs(self, dtype, device):
 
 class TestCompleteBoxIouLoss:
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_ciou_loss(self, dtype, device):
         box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device)
 
@@ -1461,14 +1461,14 @@ def test_ciou_loss(self, dtype, device):
         with pytest.raises(ValueError, match="Invalid"):
             ops.complete_box_iou_loss(box1s, box2s, reduction="xyz")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_empty_inputs(self, dtype, device):
         assert_empty_loss(ops.complete_box_iou_loss, dtype, device)
 
 
 class TestDistanceBoxIouLoss:
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_distance_iou_loss(self, dtype, device):
         box1, box2, box3, box4, box1s, box2s = get_boxes(dtype, device)
@@ -1483,7 +1483,7 @@ def test_distance_iou_loss(self, dtype, device):
         with pytest.raises(ValueError, match="Invalid"):
             ops.distance_box_iou_loss(box1s, box2s, reduction="xyz")
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_empty_distance_iou_inputs(self, dtype, device):
         assert_empty_loss(ops.distance_box_iou_loss, dtype, device)
@@ -1528,7 +1528,7 @@ def generate_tensor_with_range_type(shape, range_type, **kwargs):
 
     @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0])
     @pytest.mark.parametrize("gamma", [0, 2])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     @pytest.mark.parametrize("seed", [0, 1])
     def test_correct_ratio(self, alpha, gamma, device, dtype, seed):
@@ -1557,7 +1557,7 @@ def test_correct_ratio(self, alpha, gamma, device, dtype, seed):
         torch.testing.assert_close(correct_ratio, loss_ratio, atol=tol, rtol=tol)
 
     @pytest.mark.parametrize("reduction", ["mean", "sum"])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     @pytest.mark.parametrize("seed", [2, 3])
     def test_equal_ce_loss(self, reduction, device, dtype, seed):
@@ -1584,7 +1584,7 @@ def test_equal_ce_loss(self, reduction, device, dtype, seed):
     @pytest.mark.parametrize("alpha", [-1.0, 0.0, 0.58, 1.0])
     @pytest.mark.parametrize("gamma", [0, 2])
     @pytest.mark.parametrize("reduction", ["none", "mean", "sum"])
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     @pytest.mark.parametrize("seed", [4, 5])
     def test_jit(self, alpha, gamma, reduction, device, dtype, seed):
@@ -1600,7 +1600,7 @@ def test_jit(self, alpha, gamma, reduction, device, dtype, seed):
         torch.testing.assert_close(focal_loss, scripted_focal_loss, rtol=tol, atol=tol)
 
     # Raise ValueError for anonymous reduction mode
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dtype", [torch.float32, torch.half])
     def test_reduction_mode(self, device, dtype, reduction="xyz"):
         if device == "cpu" and dtype is torch.half:
diff --git a/test/test_prototype_datapoints.py b/test/test_prototype_datapoints.py
deleted file mode 100644
index 04e3cd67f96..00000000000
--- a/test/test_prototype_datapoints.py
+++ /dev/null
@@ -1,133 +0,0 @@
-import pytest
-import torch
-
-from torchvision.prototype import datapoints as proto_datapoints
-
-
-@pytest.mark.parametrize(
-    ("data", "input_requires_grad", "expected_requires_grad"),
-    [
-        ([0.0], None, False),
-        ([0.0], False, False),
-        ([0.0], True, True),
-        (torch.tensor([0.0], requires_grad=False), None, False),
-        (torch.tensor([0.0], requires_grad=False), False, False),
-        (torch.tensor([0.0], requires_grad=False), True, True),
-        (torch.tensor([0.0], requires_grad=True), None, True),
-        (torch.tensor([0.0], requires_grad=True), False, False),
-        (torch.tensor([0.0], requires_grad=True), True, True),
-    ],
-)
-def test_new_requires_grad(data, input_requires_grad, expected_requires_grad):
-    datapoint = proto_datapoints.Label(data, requires_grad=input_requires_grad)
-    assert datapoint.requires_grad is expected_requires_grad
-
-
-def test_isinstance():
-    assert isinstance(
-        proto_datapoints.Label([0, 1, 0], categories=["foo", "bar"]),
-        torch.Tensor,
-    )
-
-
-def test_wrapping_no_copy():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    assert label.data_ptr() == tensor.data_ptr()
-
-
-def test_to_wrapping():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    label_to = label.to(torch.int32)
-
-    assert type(label_to) is proto_datapoints.Label
-    assert label_to.dtype is torch.int32
-    assert label_to.categories is label.categories
-
-
-def test_to_datapoint_reference():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"]).to(torch.int32)
-
-    tensor_to = tensor.to(label)
-
-    assert type(tensor_to) is torch.Tensor
-    assert tensor_to.dtype is torch.int32
-
-
-def test_clone_wrapping():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    label_clone = label.clone()
-
-    assert type(label_clone) is proto_datapoints.Label
-    assert label_clone.data_ptr() != label.data_ptr()
-    assert label_clone.categories is label.categories
-
-
-def test_requires_grad__wrapping():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.float32)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    assert not label.requires_grad
-
-    label_requires_grad = label.requires_grad_(True)
-
-    assert type(label_requires_grad) is proto_datapoints.Label
-    assert label.requires_grad
-    assert label_requires_grad.requires_grad
-
-
-def test_other_op_no_wrapping():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    # any operation besides .to() and .clone() will do here
-    output = label * 2
-
-    assert type(output) is torch.Tensor
-
-
-@pytest.mark.parametrize(
-    "op",
-    [
-        lambda t: t.numpy(),
-        lambda t: t.tolist(),
-        lambda t: t.max(dim=-1),
-    ],
-)
-def test_no_tensor_output_op_no_wrapping(op):
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    output = op(label)
-
-    assert type(output) is not proto_datapoints.Label
-
-
-def test_inplace_op_no_wrapping():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    output = label.add_(0)
-
-    assert type(output) is torch.Tensor
-    assert type(label) is proto_datapoints.Label
-
-
-def test_wrap_like():
-    tensor = torch.tensor([0, 1, 0], dtype=torch.int64)
-    label = proto_datapoints.Label(tensor, categories=["foo", "bar"])
-
-    # any operation besides .to() and .clone() will do here
-    output = label * 2
-
-    label_new = proto_datapoints.Label.wrap_like(label, output)
-
-    assert type(label_new) is proto_datapoints.Label
-    assert label_new.data_ptr() == output.data_ptr()
-    assert label_new.categories is label.categories
diff --git a/test/test_prototype_models.py b/test/test_prototype_models.py
index 6d9f22c1543..d32df68f1f4 100644
--- a/test/test_prototype_models.py
+++ b/test/test_prototype_models.py
@@ -1,13 +1,13 @@
 import pytest
 import test_models as TM
 import torch
-from common_utils import cpu_and_gpu, set_rng_seed
+from common_utils import cpu_and_cuda, set_rng_seed
 from torchvision.prototype import models
 
 
 @pytest.mark.parametrize("model_fn", (models.depth.stereo.raft_stereo_base,))
 @pytest.mark.parametrize("model_mode", ("standard", "scripted"))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_raft_stereo(model_fn, model_mode, dev):
     # A simple test to make sure the model can do forward pass and jit scriptable
     set_rng_seed(0)
@@ -40,7 +40,7 @@ def test_raft_stereo(model_fn, model_mode, dev):
 
 @pytest.mark.parametrize("model_fn", (models.depth.stereo.crestereo_base,))
 @pytest.mark.parametrize("model_mode", ("standard", "scripted"))
-@pytest.mark.parametrize("dev", cpu_and_gpu())
+@pytest.mark.parametrize("dev", cpu_and_cuda())
 def test_crestereo(model_fn, model_mode, dev):
     set_rng_seed(0)
 
diff --git a/test/test_prototype_transforms.py b/test/test_prototype_transforms.py
index 255c3b5c32f..c574979e22c 100644
--- a/test/test_prototype_transforms.py
+++ b/test/test_prototype_transforms.py
@@ -216,7 +216,7 @@ def test__get_params(self, mocker):
 
         flat_inputs = [
             make_image(size=spatial_size, color_space="RGB"),
-            make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=batch_shape),
+            make_bounding_box(format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=batch_shape),
         ]
         params = transform._get_params(flat_inputs)
 
@@ -312,9 +312,9 @@ def test__transform_culling(self, mocker):
         )
 
         bounding_boxes = make_bounding_box(
-            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
+            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
         )
-        masks = make_detection_mask(size=spatial_size, extra_dims=(batch_size,))
+        masks = make_detection_mask(size=spatial_size, batch_dims=(batch_size,))
         labels = make_label(extra_dims=(batch_size,))
 
         transform = transforms.FixedSizeCrop((-1, -1))
@@ -350,7 +350,7 @@ def test__transform_bounding_box_clamping(self, mocker):
         )
 
         bounding_box = make_bounding_box(
-            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(batch_size,)
+            format=BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(batch_size,)
         )
         mock = mocker.patch("torchvision.prototype.transforms._geometry.F.clamp_bounding_box")
 
@@ -496,7 +496,7 @@ def make_datapoints():
 
         pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
             "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
         }
@@ -505,7 +505,7 @@ def make_datapoints():
 
         tensor_image = torch.Tensor(make_image(size=size, color_space="RGB"))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
             "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
         }
@@ -514,7 +514,7 @@ def make_datapoints():
 
         datapoint_image = make_image(size=size, color_space="RGB")
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
             "masks": make_detection_mask(size=size, num_objects=num_objects, dtype=torch.long),
         }
diff --git a/test/test_transforms.py b/test/test_transforms.py
index 41075c6514a..7581bf33220 100644
--- a/test/test_transforms.py
+++ b/test/test_transforms.py
@@ -952,33 +952,6 @@ def test_adjust_contrast():
     torch.testing.assert_close(y_np, y_ans)
 
 
-@pytest.mark.skipif(Image.__version__ >= "7", reason="Temporarily disabled")
-def test_adjust_saturation():
-    x_shape = [2, 2, 3]
-    x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
-    x_np = np.array(x_data, dtype=np.uint8).reshape(x_shape)
-    x_pil = Image.fromarray(x_np, mode="RGB")
-
-    # test 0
-    y_pil = F.adjust_saturation(x_pil, 1)
-    y_np = np.array(y_pil)
-    torch.testing.assert_close(y_np, x_np)
-
-    # test 1
-    y_pil = F.adjust_saturation(x_pil, 0.5)
-    y_np = np.array(y_pil)
-    y_ans = [2, 4, 8, 87, 128, 173, 39, 25, 138, 133, 215, 88]
-    y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
-    torch.testing.assert_close(y_np, y_ans)
-
-    # test 2
-    y_pil = F.adjust_saturation(x_pil, 2)
-    y_np = np.array(y_pil)
-    y_ans = [0, 6, 22, 0, 149, 255, 32, 0, 255, 4, 255, 0]
-    y_ans = np.array(y_ans, dtype=np.uint8).reshape(x_shape)
-    torch.testing.assert_close(y_np, y_ans)
-
-
 def test_adjust_hue():
     x_shape = [2, 2, 3]
     x_data = [0, 5, 13, 54, 135, 226, 37, 8, 234, 90, 255, 1]
diff --git a/test/test_transforms_tensor.py b/test/test_transforms_tensor.py
index 077a12af490..e2ab5673f1e 100644
--- a/test/test_transforms_tensor.py
+++ b/test/test_transforms_tensor.py
@@ -12,7 +12,7 @@
     _create_data,
     _create_data_batch,
     assert_equal,
-    cpu_and_gpu,
+    cpu_and_cuda,
     float_dtypes,
     get_tmp_dir,
     int_dtypes,
@@ -105,7 +105,7 @@ def _test_fn_save_load(fn, tmpdir):
     _ = torch.jit.load(p)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "func,method,fn_kwargs,match_kwargs",
     [
@@ -130,7 +130,7 @@ def test_random(func, method, device, channels, fn_kwargs, match_kwargs):
 
 
 @pytest.mark.parametrize("seed", range(10))
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("channels", [1, 3])
 class TestColorJitter:
     @pytest.fixture(autouse=True)
@@ -206,7 +206,7 @@ def test_color_jitter_all(self, device, channels):
         )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("m", ["constant", "edge", "reflect", "symmetric"])
 @pytest.mark.parametrize("mul", [1, -1])
 def test_pad(m, mul, device):
@@ -229,7 +229,7 @@ def test_pad(m, mul, device):
     _test_op(F.pad, T.Pad, device=device, fn_kwargs=fn_kwargs, meth_kwargs=meth_kwargs)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_crop(device):
     fn_kwargs = {"top": 2, "left": 3, "height": 4, "width": 5}
     # Test transforms.RandomCrop with size and padding as tuple
@@ -257,7 +257,7 @@ def test_crop(device):
     _test_functional_op(F.crop, fn_kwargs=fn_kwargs, device=device)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "padding_config",
     [
@@ -283,7 +283,7 @@ def test_random_crop_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_center_crop(device, tmpdir):
     fn_kwargs = {"output_size": (4, 5)}
     meth_kwargs = {"size": (4, 5)}
@@ -313,7 +313,7 @@ def test_center_crop_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "fn, method, out_length",
     [
@@ -380,7 +380,7 @@ def test_resize_int(self, size):
         assert y.shape[1] == size
         assert y.shape[2] == int(size * 46 / 32)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64])
     @pytest.mark.parametrize("size", [[32], [32, 32], (32, 32), [34, 35]])
     @pytest.mark.parametrize("max_size", [None, 35, 1000])
@@ -404,7 +404,7 @@ def test_resize_save_load(self, tmpdir):
         fn = T.Resize(size=[32], antialias=True)
         _test_fn_save_load(fn, tmpdir)
 
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]])
     @pytest.mark.parametrize("ratio", [(0.75, 1.333), [0.75, 1.333]])
     @pytest.mark.parametrize("size", [(32,), [44], [32], [32, 32], (32, 32), [44, 55]])
@@ -460,42 +460,42 @@ def test_random_affine_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("shear", [15, 10.0, (5.0, 10.0), [-15, 15], [-10.0, 10.0, -11.0, 11.0]])
 def test_random_affine_shear(device, interpolation, shear):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, shear=shear)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("scale", [(0.7, 1.2), [0.7, 1.2]])
 def test_random_affine_scale(device, interpolation, scale):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, scale=scale)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("translate", [(0.1, 0.2), [0.2, 0.1]])
 def test_random_affine_translate(device, interpolation, translate):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, translate=translate)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]])
 def test_random_affine_degrees(device, interpolation, degrees):
     _test_random_affine_helper(device, degrees=degrees, interpolation=interpolation)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_random_affine_fill(device, interpolation, fill):
     _test_random_affine_helper(device, degrees=0.0, interpolation=interpolation, fill=fill)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("center", [(0, 0), [10, 10], None, (56, 44)])
 @pytest.mark.parametrize("expand", [True, False])
 @pytest.mark.parametrize("degrees", [45, 35.0, (-45, 45), [-90.0, 90.0]])
@@ -517,7 +517,7 @@ def test_random_rotate_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("distortion_scale", np.linspace(0.1, 1.0, num=20))
 @pytest.mark.parametrize("interpolation", [NEAREST, BILINEAR])
 @pytest.mark.parametrize("fill", [85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
@@ -537,7 +537,7 @@ def test_random_perspective_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "Klass, meth_kwargs",
     [(T.Grayscale, {"num_output_channels": 1}), (T.Grayscale, {"num_output_channels": 3}), (T.RandomGrayscale, {})],
@@ -547,7 +547,7 @@ def test_to_grayscale(device, Klass, meth_kwargs):
     _test_class_op(Klass, meth_kwargs=meth_kwargs, test_exact_match=False, device=device, tol=tol, agg_method="max")
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("in_dtype", int_dtypes() + float_dtypes())
 @pytest.mark.parametrize("out_dtype", int_dtypes() + float_dtypes())
 def test_convert_image_dtype(device, in_dtype, out_dtype):
@@ -578,7 +578,7 @@ def test_convert_image_dtype_save_load(tmpdir):
     _test_fn_save_load(fn, tmpdir)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("policy", [policy for policy in T.AutoAugmentPolicy])
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_autoaugment(device, policy, fill):
@@ -592,7 +592,7 @@ def test_autoaugment(device, policy, fill):
         _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_ops", [1, 2, 3])
 @pytest.mark.parametrize("magnitude", [7, 9, 11])
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
@@ -607,7 +607,7 @@ def test_randaugment(device, num_ops, magnitude, fill):
         _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_trivialaugmentwide(device, fill):
     tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device)
@@ -620,7 +620,7 @@ def test_trivialaugmentwide(device, fill):
         _test_transform_vs_scripted_on_batch(transform, s_transform, batch_tensors)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("fill", [None, 85, (10, -10, 10), 0.7, [0.0, 0.0, 0.0], [1], 1])
 def test_augmix(device, fill):
     tensor = torch.randint(0, 256, size=(3, 44, 56), dtype=torch.uint8, device=device)
@@ -686,7 +686,7 @@ def shear(pil_img, level, mode, resample):
     _assert_approx_equal_tensor_to_pil(out, expected_out)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "config",
     [
@@ -724,7 +724,7 @@ def test_random_erasing_with_invalid_data():
         random_erasing(img)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_normalize(device, tmpdir):
     fn = T.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
     tensor, _ = _create_data(26, 34, device=device)
@@ -743,7 +743,7 @@ def test_normalize(device, tmpdir):
     scripted_fn.save(os.path.join(tmpdir, "t_norm.pt"))
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_linear_transformation(device, tmpdir):
     c, h, w = 3, 24, 32
 
@@ -769,7 +769,7 @@ def test_linear_transformation(device, tmpdir):
     scripted_fn.save(os.path.join(tmpdir, "t_norm.pt"))
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_compose(device):
     tensor, _ = _create_data(26, 34, device=device)
     tensor = tensor.to(dtype=torch.float32) / 255.0
@@ -797,7 +797,7 @@ def test_compose(device):
         torch.jit.script(t)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_random_apply(device):
     tensor, _ = _create_data(26, 34, device=device)
     tensor = tensor.to(dtype=torch.float32) / 255.0
@@ -839,7 +839,7 @@ def test_random_apply(device):
             torch.jit.script(transforms)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "meth_kwargs",
     [
@@ -877,7 +877,7 @@ def test_gaussian_blur(device, channels, meth_kwargs):
     )
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "fill",
     [
diff --git a/test/test_transforms_v2.py b/test/test_transforms_v2.py
index bfb403c7abe..755a7b0350c 100644
--- a/test/test_transforms_v2.py
+++ b/test/test_transforms_v2.py
@@ -16,7 +16,7 @@
 from common_utils import (
     assert_equal,
     assert_run_python_script,
-    cpu_and_gpu,
+    cpu_and_cuda,
     make_bounding_box,
     make_bounding_boxes,
     make_detection_mask,
@@ -29,7 +29,7 @@
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from torchvision import datapoints
 from torchvision.ops.boxes import box_iou
-from torchvision.transforms.functional import InterpolationMode, pil_to_tensor, to_pil_image
+from torchvision.transforms.functional import InterpolationMode, to_pil_image
 from torchvision.transforms.v2 import functional as F
 from torchvision.transforms.v2.utils import check_type, is_simple_tensor, query_chw
 
@@ -199,7 +199,7 @@ class TestSmoke:
             next(make_vanilla_tensor_images()),
         ],
     )
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_common(self, transform, adapter, container_type, image_or_video, device):
         spatial_size = F.get_spatial_size(image_or_video)
         input = dict(
@@ -208,13 +208,13 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
             video_datapoint=make_video(size=spatial_size),
             image_pil=next(make_pil_images(sizes=[spatial_size], color_spaces=["RGB"])),
             bounding_box_xyxy=make_bounding_box(
-                format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, extra_dims=(3,)
+                format=datapoints.BoundingBoxFormat.XYXY, spatial_size=spatial_size, batch_dims=(3,)
             ),
             bounding_box_xywh=make_bounding_box(
-                format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, extra_dims=(4,)
+                format=datapoints.BoundingBoxFormat.XYWH, spatial_size=spatial_size, batch_dims=(4,)
             ),
             bounding_box_cxcywh=make_bounding_box(
-                format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, extra_dims=(5,)
+                format=datapoints.BoundingBoxFormat.CXCYWH, spatial_size=spatial_size, batch_dims=(5,)
             ),
             bounding_box_degenerate_xyxy=datapoints.BoundingBox(
                 [
@@ -315,7 +315,7 @@ def test_common(self, transform, adapter, container_type, image_or_video, device
                         ],
                         dtypes=[torch.uint8],
                         extra_dims=[(), (4,)],
-                        **(dict(num_frames=["random"]) if fn is make_videos else dict()),
+                        **(dict(num_frames=[3]) if fn is make_videos else dict()),
                     )
                     for fn in [
                         make_images,
@@ -463,112 +463,6 @@ def was_applied(output, inpt):
         assert transform.was_applied(output, input)
 
 
-@pytest.mark.parametrize("p", [0.0, 1.0])
-class TestRandomHorizontalFlip:
-    def input_expected_image_tensor(self, p, dtype=torch.float32):
-        input = torch.tensor([[[0, 1], [0, 1]], [[1, 0], [1, 0]]], dtype=dtype)
-        expected = torch.tensor([[[1, 0], [1, 0]], [[0, 1], [0, 1]]], dtype=dtype)
-
-        return input, expected if p == 1 else input
-
-    def test_simple_tensor(self, p):
-        input, expected = self.input_expected_image_tensor(p)
-        transform = transforms.RandomHorizontalFlip(p=p)
-
-        actual = transform(input)
-
-        assert_equal(expected, actual)
-
-    def test_pil_image(self, p):
-        input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
-        transform = transforms.RandomHorizontalFlip(p=p)
-
-        actual = transform(to_pil_image(input))
-
-        assert_equal(expected, pil_to_tensor(actual))
-
-    def test_datapoints_image(self, p):
-        input, expected = self.input_expected_image_tensor(p)
-        transform = transforms.RandomHorizontalFlip(p=p)
-
-        actual = transform(datapoints.Image(input))
-
-        assert_equal(datapoints.Image(expected), actual)
-
-    def test_datapoints_mask(self, p):
-        input, expected = self.input_expected_image_tensor(p)
-        transform = transforms.RandomHorizontalFlip(p=p)
-
-        actual = transform(datapoints.Mask(input))
-
-        assert_equal(datapoints.Mask(expected), actual)
-
-    def test_datapoints_bounding_box(self, p):
-        input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
-        transform = transforms.RandomHorizontalFlip(p=p)
-
-        actual = transform(input)
-
-        expected_image_tensor = torch.tensor([5, 0, 10, 5]) if p == 1.0 else input
-        expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
-        assert_equal(expected, actual)
-        assert actual.format == expected.format
-        assert actual.spatial_size == expected.spatial_size
-
-
-@pytest.mark.parametrize("p", [0.0, 1.0])
-class TestRandomVerticalFlip:
-    def input_expected_image_tensor(self, p, dtype=torch.float32):
-        input = torch.tensor([[[1, 1], [0, 0]], [[1, 1], [0, 0]]], dtype=dtype)
-        expected = torch.tensor([[[0, 0], [1, 1]], [[0, 0], [1, 1]]], dtype=dtype)
-
-        return input, expected if p == 1 else input
-
-    def test_simple_tensor(self, p):
-        input, expected = self.input_expected_image_tensor(p)
-        transform = transforms.RandomVerticalFlip(p=p)
-
-        actual = transform(input)
-
-        assert_equal(expected, actual)
-
-    def test_pil_image(self, p):
-        input, expected = self.input_expected_image_tensor(p, dtype=torch.uint8)
-        transform = transforms.RandomVerticalFlip(p=p)
-
-        actual = transform(to_pil_image(input))
-
-        assert_equal(expected, pil_to_tensor(actual))
-
-    def test_datapoints_image(self, p):
-        input, expected = self.input_expected_image_tensor(p)
-        transform = transforms.RandomVerticalFlip(p=p)
-
-        actual = transform(datapoints.Image(input))
-
-        assert_equal(datapoints.Image(expected), actual)
-
-    def test_datapoints_mask(self, p):
-        input, expected = self.input_expected_image_tensor(p)
-        transform = transforms.RandomVerticalFlip(p=p)
-
-        actual = transform(datapoints.Mask(input))
-
-        assert_equal(datapoints.Mask(expected), actual)
-
-    def test_datapoints_bounding_box(self, p):
-        input = datapoints.BoundingBox([0, 0, 5, 5], format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(10, 10))
-        transform = transforms.RandomVerticalFlip(p=p)
-
-        actual = transform(input)
-
-        expected_image_tensor = torch.tensor([0, 5, 5, 10]) if p == 1.0 else input
-        expected = datapoints.BoundingBox.wrap_like(input, expected_image_tensor)
-        assert_equal(expected, actual)
-        assert actual.format == expected.format
-        assert actual.spatial_size == expected.spatial_size
-
-
 class TestPad:
     def test_assertions(self):
         with pytest.raises(TypeError, match="Got inappropriate padding arg"):
@@ -704,204 +598,6 @@ def test__transform_image_mask(self, fill, mocker):
         fn.assert_has_calls(calls)
 
 
-class TestRandomRotation:
-    def test_assertions(self):
-        with pytest.raises(ValueError, match="is a single number, it must be positive"):
-            transforms.RandomRotation(-0.7)
-
-        for d in [[-0.7], [-0.7, 0, 0.7]]:
-            with pytest.raises(ValueError, match="degrees should be a sequence of length 2"):
-                transforms.RandomRotation(d)
-
-        with pytest.raises(TypeError, match="Got inappropriate fill arg"):
-            transforms.RandomRotation(12, fill="abc")
-
-        with pytest.raises(TypeError, match="center should be a sequence of length"):
-            transforms.RandomRotation(12, center=12)
-
-        with pytest.raises(ValueError, match="center should be a sequence of length"):
-            transforms.RandomRotation(12, center=[1, 2, 3])
-
-    def test__get_params(self):
-        angle_bound = 34
-        transform = transforms.RandomRotation(angle_bound)
-
-        params = transform._get_params(None)
-        assert -angle_bound <= params["angle"] <= angle_bound
-
-        angle_bounds = [12, 34]
-        transform = transforms.RandomRotation(angle_bounds)
-
-        params = transform._get_params(None)
-        assert angle_bounds[0] <= params["angle"] <= angle_bounds[1]
-
-    @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
-    @pytest.mark.parametrize("expand", [False, True])
-    @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
-    @pytest.mark.parametrize("center", [None, [2.0, 3.0]])
-    def test__transform(self, degrees, expand, fill, center, mocker):
-        interpolation = InterpolationMode.BILINEAR
-        transform = transforms.RandomRotation(
-            degrees, interpolation=interpolation, expand=expand, fill=fill, center=center
-        )
-
-        if isinstance(degrees, (tuple, list)):
-            assert transform.degrees == [float(degrees[0]), float(degrees[1])]
-        else:
-            assert transform.degrees == [float(-degrees), float(degrees)]
-
-        fn = mocker.patch("torchvision.transforms.v2.functional.rotate")
-        inpt = mocker.MagicMock(spec=datapoints.Image)
-        # vfdev-5, Feature Request: let's store params as Transform attribute
-        # This could be also helpful for users
-        # Otherwise, we can mock transform._get_params
-        torch.manual_seed(12)
-        _ = transform(inpt)
-        torch.manual_seed(12)
-        params = transform._get_params(inpt)
-
-        fill = transforms._utils._convert_fill_arg(fill)
-        fn.assert_called_once_with(inpt, **params, interpolation=interpolation, expand=expand, fill=fill, center=center)
-
-    @pytest.mark.parametrize("angle", [34, -87])
-    @pytest.mark.parametrize("expand", [False, True])
-    def test_boundingbox_spatial_size(self, angle, expand):
-        # Specific test for BoundingBox.rotate
-        bbox = datapoints.BoundingBox(
-            torch.tensor([1, 2, 3, 4]), format=datapoints.BoundingBoxFormat.XYXY, spatial_size=(32, 32)
-        )
-        img = datapoints.Image(torch.rand(1, 3, 32, 32))
-
-        out_img = img.rotate(angle, expand=expand)
-        out_bbox = bbox.rotate(angle, expand=expand)
-
-        assert out_img.spatial_size == out_bbox.spatial_size
-
-
-class TestRandomAffine:
-    def test_assertions(self):
-        with pytest.raises(ValueError, match="is a single number, it must be positive"):
-            transforms.RandomAffine(-0.7)
-
-        for d in [[-0.7], [-0.7, 0, 0.7]]:
-            with pytest.raises(ValueError, match="degrees should be a sequence of length 2"):
-                transforms.RandomAffine(d)
-
-        with pytest.raises(TypeError, match="Got inappropriate fill arg"):
-            transforms.RandomAffine(12, fill="abc")
-
-        with pytest.raises(TypeError, match="Got inappropriate fill arg"):
-            transforms.RandomAffine(12, fill="abc")
-
-        for kwargs in [
-            {"center": 12},
-            {"translate": 12},
-            {"scale": 12},
-        ]:
-            with pytest.raises(TypeError, match="should be a sequence of length"):
-                transforms.RandomAffine(12, **kwargs)
-
-        for kwargs in [{"center": [1, 2, 3]}, {"translate": [1, 2, 3]}, {"scale": [1, 2, 3]}]:
-            with pytest.raises(ValueError, match="should be a sequence of length"):
-                transforms.RandomAffine(12, **kwargs)
-
-        with pytest.raises(ValueError, match="translation values should be between 0 and 1"):
-            transforms.RandomAffine(12, translate=[-1.0, 2.0])
-
-        with pytest.raises(ValueError, match="scale values should be positive"):
-            transforms.RandomAffine(12, scale=[-1.0, 2.0])
-
-        with pytest.raises(ValueError, match="is a single number, it must be positive"):
-            transforms.RandomAffine(12, shear=-10)
-
-        for s in [[-0.7], [-0.7, 0, 0.7]]:
-            with pytest.raises(ValueError, match="shear should be a sequence of length 2"):
-                transforms.RandomAffine(12, shear=s)
-
-    @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
-    @pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
-    @pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
-    @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
-    def test__get_params(self, degrees, translate, scale, shear, mocker):
-        image = mocker.MagicMock(spec=datapoints.Image)
-        image.num_channels = 3
-        image.spatial_size = (24, 32)
-        h, w = image.spatial_size
-
-        transform = transforms.RandomAffine(degrees, translate=translate, scale=scale, shear=shear)
-        params = transform._get_params([image])
-
-        if not isinstance(degrees, (list, tuple)):
-            assert -degrees <= params["angle"] <= degrees
-        else:
-            assert degrees[0] <= params["angle"] <= degrees[1]
-
-        if translate is not None:
-            w_max = int(round(translate[0] * w))
-            h_max = int(round(translate[1] * h))
-            assert -w_max <= params["translate"][0] <= w_max
-            assert -h_max <= params["translate"][1] <= h_max
-        else:
-            assert params["translate"] == (0, 0)
-
-        if scale is not None:
-            assert scale[0] <= params["scale"] <= scale[1]
-        else:
-            assert params["scale"] == 1.0
-
-        if shear is not None:
-            if isinstance(shear, float):
-                assert -shear <= params["shear"][0] <= shear
-                assert params["shear"][1] == 0.0
-            elif len(shear) == 2:
-                assert shear[0] <= params["shear"][0] <= shear[1]
-                assert params["shear"][1] == 0.0
-            else:
-                assert shear[0] <= params["shear"][0] <= shear[1]
-                assert shear[2] <= params["shear"][1] <= shear[3]
-        else:
-            assert params["shear"] == (0, 0)
-
-    @pytest.mark.parametrize("degrees", [23, [0, 45], (0, 45)])
-    @pytest.mark.parametrize("translate", [None, [0.1, 0.2]])
-    @pytest.mark.parametrize("scale", [None, [0.7, 1.2]])
-    @pytest.mark.parametrize("shear", [None, 2.0, [5.0, 15.0], [1.0, 2.0, 3.0, 4.0]])
-    @pytest.mark.parametrize("fill", [0, [1, 2, 3], (2, 3, 4)])
-    @pytest.mark.parametrize("center", [None, [2.0, 3.0]])
-    def test__transform(self, degrees, translate, scale, shear, fill, center, mocker):
-        interpolation = InterpolationMode.BILINEAR
-        transform = transforms.RandomAffine(
-            degrees,
-            translate=translate,
-            scale=scale,
-            shear=shear,
-            interpolation=interpolation,
-            fill=fill,
-            center=center,
-        )
-
-        if isinstance(degrees, (tuple, list)):
-            assert transform.degrees == [float(degrees[0]), float(degrees[1])]
-        else:
-            assert transform.degrees == [float(-degrees), float(degrees)]
-
-        fn = mocker.patch("torchvision.transforms.v2.functional.affine")
-        inpt = mocker.MagicMock(spec=datapoints.Image)
-        inpt.num_channels = 3
-        inpt.spatial_size = (24, 32)
-
-        # vfdev-5, Feature Request: let's store params as Transform attribute
-        # This could be also helpful for users
-        # Otherwise, we can mock transform._get_params
-        torch.manual_seed(12)
-        _ = transform(inpt)
-        torch.manual_seed(12)
-        params = transform._get_params([inpt])
-
-        fill = transforms._utils._convert_fill_arg(fill)
-        fn.assert_called_once_with(inpt, **params, interpolation=interpolation, fill=fill, center=center)
-
-
 class TestRandomCrop:
     def test_assertions(self):
         with pytest.raises(ValueError, match="Please provide only two dimensions"):
@@ -1421,7 +1117,7 @@ def test_assertions(self):
 
 
 class TestRandomIoUCrop:
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     @pytest.mark.parametrize("options", [[0.5, 0.9], [2.0]])
     def test__get_params(self, device, options, mocker):
         image = mocker.MagicMock(spec=datapoints.Image)
@@ -1485,7 +1181,7 @@ def test__transform(self, mocker):
         transform = transforms.RandomIoUCrop()
 
         image = datapoints.Image(torch.rand(3, 32, 24))
-        bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), extra_dims=(6,))
+        bboxes = make_bounding_box(format="XYXY", spatial_size=(32, 24), batch_dims=(6,))
         masks = make_detection_mask((32, 24), num_objects=6)
 
         sample = [image, bboxes, masks]
@@ -1768,8 +1464,6 @@ def test_antialias_warning():
     tensor_video = torch.randint(0, 256, size=(2, 3, 10, 10), dtype=torch.uint8)
 
     match = "The default value of the antialias parameter"
-    with pytest.warns(UserWarning, match=match):
-        transforms.Resize((20, 20))(tensor_img)
     with pytest.warns(UserWarning, match=match):
         transforms.RandomResizedCrop((20, 20))(tensor_img)
     with pytest.warns(UserWarning, match=match):
@@ -1779,18 +1473,6 @@ def test_antialias_warning():
     with pytest.warns(UserWarning, match=match):
         transforms.RandomResize(10, 20)(tensor_img)
 
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize(tensor_img, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize_image_tensor(tensor_img, (20, 20))
-
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize(tensor_video, (20, 20))
-    with pytest.warns(UserWarning, match=match):
-        transforms.functional.resize_video(tensor_video, (20, 20))
-
-    with pytest.warns(UserWarning, match=match):
-        datapoints.Image(tensor_img).resize((20, 20))
     with pytest.warns(UserWarning, match=match):
         datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20))
 
@@ -1801,27 +1483,17 @@ def test_antialias_warning():
 
     with warnings.catch_warnings():
         warnings.simplefilter("error")
-        transforms.Resize((20, 20))(pil_img)
         transforms.RandomResizedCrop((20, 20))(pil_img)
         transforms.ScaleJitter((20, 20))(pil_img)
         transforms.RandomShortestSize((20, 20))(pil_img)
         transforms.RandomResize(10, 20)(pil_img)
-        transforms.functional.resize(pil_img, (20, 20))
 
-        transforms.Resize((20, 20), antialias=True)(tensor_img)
         transforms.RandomResizedCrop((20, 20), antialias=True)(tensor_img)
         transforms.ScaleJitter((20, 20), antialias=True)(tensor_img)
         transforms.RandomShortestSize((20, 20), antialias=True)(tensor_img)
         transforms.RandomResize(10, 20, antialias=True)(tensor_img)
 
-        transforms.functional.resize(tensor_img, (20, 20), antialias=True)
-        transforms.functional.resize_image_tensor(tensor_img, (20, 20), antialias=True)
-        transforms.functional.resize(tensor_video, (20, 20), antialias=True)
-        transforms.functional.resize_video(tensor_video, (20, 20), antialias=True)
-
-        datapoints.Image(tensor_img).resize((20, 20), antialias=True)
         datapoints.Image(tensor_img).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
-        datapoints.Video(tensor_video).resize((20, 20), antialias=True)
         datapoints.Video(tensor_video).resized_crop(0, 0, 10, 10, (20, 20), antialias=True)
 
 
diff --git a/test/test_transforms_v2_consistency.py b/test/test_transforms_v2_consistency.py
index e541feaf1eb..bf297473bc2 100644
--- a/test/test_transforms_v2_consistency.py
+++ b/test/test_transforms_v2_consistency.py
@@ -22,6 +22,7 @@
     make_image,
     make_images,
     make_segmentation_mask,
+    set_rng_seed,
 )
 from torch import nn
 from torchvision import datapoints, transforms as legacy_transforms
@@ -35,6 +36,12 @@
 DEFAULT_MAKE_IMAGES_KWARGS = dict(color_spaces=["RGB"], extra_dims=[(4,)])
 
 
+@pytest.fixture(autouse=True)
+def fix_rng_seed():
+    set_rng_seed(0)
+    yield
+
+
 class NotScriptableArgsKwargs(ArgsKwargs):
     """
     This class is used to mark parameters that render the transform non-scriptable. They still work in eager mode and
@@ -1083,7 +1090,7 @@ def make_label(extra_dims, categories):
 
         pil_image = to_image_pil(make_image(size=size, color_space="RGB"))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
         }
         if with_mask:
@@ -1091,9 +1098,9 @@ def make_label(extra_dims, categories):
 
         yield (pil_image, target)
 
-        tensor_image = torch.Tensor(make_image(size=size, color_space="RGB"))
+        tensor_image = torch.Tensor(make_image(size=size, color_space="RGB", dtype=torch.float32))
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
         }
         if with_mask:
@@ -1101,9 +1108,9 @@ def make_label(extra_dims, categories):
 
         yield (tensor_image, target)
 
-        datapoint_image = make_image(size=size, color_space="RGB")
+        datapoint_image = make_image(size=size, color_space="RGB", dtype=torch.float32)
         target = {
-            "boxes": make_bounding_box(spatial_size=size, format="XYXY", extra_dims=(num_objects,), dtype=torch.float),
+            "boxes": make_bounding_box(spatial_size=size, format="XYXY", batch_dims=(num_objects,), dtype=torch.float),
             "labels": make_label(extra_dims=(num_objects,), categories=80),
         }
         if with_mask:
diff --git a/test/test_transforms_v2_functional.py b/test/test_transforms_v2_functional.py
index 60a06f571b1..465cc227107 100644
--- a/test/test_transforms_v2_functional.py
+++ b/test/test_transforms_v2_functional.py
@@ -14,7 +14,7 @@
 from common_utils import (
     assert_close,
     cache,
-    cpu_and_gpu,
+    cpu_and_cuda,
     DEFAULT_SQUARE_SPATIAL_SIZE,
     make_bounding_boxes,
     needs_cuda,
@@ -120,7 +120,7 @@ class TestKernels:
         [info for info in KERNEL_INFOS if info.logs_usage],
         args_kwargs_fn=lambda info: info.sample_inputs_fn(),
     )
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_logging(self, spy_on, info, args_kwargs, device):
         spy = spy_on(torch._C._log_api_usage_once)
 
@@ -131,7 +131,7 @@ def test_logging(self, spy_on, info, args_kwargs, device):
 
     @ignore_jit_warning_no_profile
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_scripted_vs_eager(self, test_id, info, args_kwargs, device):
         kernel_eager = info.kernel
         kernel_scripted = script(kernel_eager)
@@ -167,7 +167,7 @@ def _unbatch(self, batch, *, data_dims):
         ]
 
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_batched_vs_single(self, test_id, info, args_kwargs, device):
         (batched_input, *other_args), kwargs = args_kwargs.load(device)
 
@@ -208,7 +208,7 @@ def test_batched_vs_single(self, test_id, info, args_kwargs, device):
         )
 
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_no_inplace(self, info, args_kwargs, device):
         (input, *other_args), kwargs = args_kwargs.load(device)
         input = input.as_subclass(torch.Tensor)
@@ -240,7 +240,7 @@ def test_cuda_vs_cpu(self, test_id, info, args_kwargs):
         )
 
     @sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_dtype_and_device_consistency(self, info, args_kwargs, device):
         (input, *other_args), kwargs = args_kwargs.load(device)
         input = input.as_subclass(torch.Tensor)
@@ -320,7 +320,7 @@ class TestDispatchers:
         DISPATCHER_INFOS,
         args_kwargs_fn=lambda info: info.sample_inputs(),
     )
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_logging(self, spy_on, info, args_kwargs, device):
         spy = spy_on(torch._C._log_api_usage_once)
 
@@ -331,7 +331,7 @@ def test_logging(self, spy_on, info, args_kwargs, device):
 
     @ignore_jit_warning_no_profile
     @image_sample_inputs
-    @pytest.mark.parametrize("device", cpu_and_gpu())
+    @pytest.mark.parametrize("device", cpu_and_cuda())
     def test_scripted_smoke(self, info, args_kwargs, device):
         dispatcher = script(info.dispatcher)
 
@@ -539,6 +539,7 @@ def test_bounding_box_format_consistency(self, info, args_kwargs):
             (F.to_pil_image, F.to_image_pil),
             (F.elastic_transform, F.elastic),
             (F.convert_image_dtype, F.convert_dtype_image_tensor),
+            (F.to_grayscale, F.rgb_to_grayscale),
         ]
     ],
 )
@@ -553,7 +554,7 @@ def test_alias(alias, target):
         args_kwargs_fn=lambda info: info.sample_inputs_fn(),
     ),
 )
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
     (input, *other_args), kwargs = args_kwargs.load(device)
     dtype = other_args[0] if other_args else kwargs.get("dtype", torch.float32)
@@ -564,7 +565,7 @@ def test_convert_dtype_image_tensor_dtype_and_device(info, args_kwargs, device):
     assert output.device == input.device
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("num_channels", [1, 3])
 def test_normalize_image_tensor_stats(device, num_channels):
     stats = pytest.importorskip("scipy.stats", reason="SciPy is not available")
@@ -664,235 +665,7 @@ def _compute_affine_matrix(angle_, translate_, scale_, shear_, center_):
     return true_matrix
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
-def test_correctness_affine_bounding_box_on_fixed_input(device):
-    # Check transformation against known expected output
-    format = datapoints.BoundingBoxFormat.XYXY
-    spatial_size = (64, 64)
-    in_boxes = [
-        [20, 25, 35, 45],
-        [50, 5, 70, 22],
-        [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10],
-        [1, 1, 5, 5],
-    ]
-    in_boxes = torch.tensor(in_boxes, dtype=torch.float64, device=device)
-    # Tested parameters
-    angle = 63
-    scale = 0.89
-    dx = 0.12
-    dy = 0.23
-
-    # Expected bboxes computed using albumentations:
-    # from albumentations.augmentations.geometric.functional import bbox_shift_scale_rotate
-    # from albumentations.augmentations.geometric.functional import normalize_bbox, denormalize_bbox
-    # expected_bboxes = []
-    # for in_box in in_boxes:
-    #     n_in_box = normalize_bbox(in_box, *spatial_size)
-    #     n_out_box = bbox_shift_scale_rotate(n_in_box, -angle, scale, dx, dy, *spatial_size)
-    #     out_box = denormalize_bbox(n_out_box, *spatial_size)
-    #     expected_bboxes.append(out_box)
-    expected_bboxes = [
-        (24.522435977922218, 34.375689508290854, 46.443125279998114, 54.3516575015695),
-        (54.88288587110401, 50.08453280875634, 76.44484547743795, 72.81332520036864),
-        (27.709526487041554, 34.74952648704156, 51.650473512958435, 58.69047351295844),
-        (48.56528888843238, 9.611532109828834, 53.35347829361575, 14.39972151501221),
-    ]
-
-    expected_bboxes = clamp_bounding_box(
-        datapoints.BoundingBox(expected_bboxes, format="XYXY", spatial_size=spatial_size)
-    ).tolist()
-
-    output_boxes = F.affine_bounding_box(
-        in_boxes,
-        format=format,
-        spatial_size=spatial_size,
-        angle=angle,
-        translate=(dx * spatial_size[1], dy * spatial_size[0]),
-        scale=scale,
-        shear=(0, 0),
-    )
-
-    torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
-
-
-@pytest.mark.parametrize("device", cpu_and_gpu())
-def test_correctness_affine_segmentation_mask_on_fixed_input(device):
-    # Check transformation against known expected output and CPU/CUDA devices
-
-    # Create a fixed input segmentation mask with 2 square masks
-    # in top-left, bottom-left corners
-    mask = torch.zeros(1, 32, 32, dtype=torch.long, device=device)
-    mask[0, 2:10, 2:10] = 1
-    mask[0, 32 - 9 : 32 - 3, 3:9] = 2
-
-    # Rotate 90 degrees and scale
-    expected_mask = torch.rot90(mask, k=-1, dims=(-2, -1))
-    expected_mask = torch.nn.functional.interpolate(expected_mask[None, :].float(), size=(64, 64), mode="nearest")
-    expected_mask = expected_mask[0, :, 16 : 64 - 16, 16 : 64 - 16].long()
-
-    out_mask = F.affine_mask(mask, 90, [0.0, 0.0], 64.0 / 32.0, [0.0, 0.0])
-
-    torch.testing.assert_close(out_mask, expected_mask)
-
-
-@pytest.mark.parametrize("angle", range(-90, 90, 56))
-@pytest.mark.parametrize("expand, center", [(True, None), (False, None), (False, (12, 14))])
-def test_correctness_rotate_bounding_box(angle, expand, center):
-    def _compute_expected_bbox(bbox, angle_, expand_, center_):
-        affine_matrix = _compute_affine_matrix(angle_, [0.0, 0.0], 1.0, [0.0, 0.0], center_)
-        affine_matrix = affine_matrix[:2, :]
-
-        height, width = bbox.spatial_size
-        bbox_xyxy = convert_format_bounding_box(bbox, new_format=datapoints.BoundingBoxFormat.XYXY)
-        points = np.array(
-            [
-                [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
-                [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
-                [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
-                [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
-                # image frame
-                [0.0, 0.0, 1.0],
-                [0.0, height, 1.0],
-                [width, height, 1.0],
-                [width, 0.0, 1.0],
-            ]
-        )
-        transformed_points = np.matmul(points, affine_matrix.T)
-        out_bbox = [
-            float(np.min(transformed_points[:4, 0])),
-            float(np.min(transformed_points[:4, 1])),
-            float(np.max(transformed_points[:4, 0])),
-            float(np.max(transformed_points[:4, 1])),
-        ]
-        if expand_:
-            tr_x = np.min(transformed_points[4:, 0])
-            tr_y = np.min(transformed_points[4:, 1])
-            out_bbox[0] -= tr_x
-            out_bbox[1] -= tr_y
-            out_bbox[2] -= tr_x
-            out_bbox[3] -= tr_y
-
-            height = int(height - 2 * tr_y)
-            width = int(width - 2 * tr_x)
-
-        out_bbox = datapoints.BoundingBox(
-            out_bbox,
-            format=datapoints.BoundingBoxFormat.XYXY,
-            spatial_size=(height, width),
-            dtype=bbox.dtype,
-            device=bbox.device,
-        )
-        out_bbox = clamp_bounding_box(convert_format_bounding_box(out_bbox, new_format=bbox.format))
-        return out_bbox, (height, width)
-
-    spatial_size = (32, 38)
-
-    for bboxes in make_bounding_boxes(spatial_size=spatial_size, extra_dims=((4,),)):
-        bboxes_format = bboxes.format
-        bboxes_spatial_size = bboxes.spatial_size
-
-        output_bboxes, output_spatial_size = F.rotate_bounding_box(
-            bboxes.as_subclass(torch.Tensor),
-            format=bboxes_format,
-            spatial_size=bboxes_spatial_size,
-            angle=angle,
-            expand=expand,
-            center=center,
-        )
-
-        center_ = center
-        if center_ is None:
-            center_ = [s * 0.5 for s in bboxes_spatial_size[::-1]]
-
-        if bboxes.ndim < 2:
-            bboxes = [bboxes]
-
-        expected_bboxes = []
-        for bbox in bboxes:
-            bbox = datapoints.BoundingBox(bbox, format=bboxes_format, spatial_size=bboxes_spatial_size)
-            expected_bbox, expected_spatial_size = _compute_expected_bbox(bbox, -angle, expand, center_)
-            expected_bboxes.append(expected_bbox)
-        if len(expected_bboxes) > 1:
-            expected_bboxes = torch.stack(expected_bboxes)
-        else:
-            expected_bboxes = expected_bboxes[0]
-        torch.testing.assert_close(output_bboxes, expected_bboxes, atol=1, rtol=0)
-        torch.testing.assert_close(output_spatial_size, expected_spatial_size, atol=1, rtol=0)
-
-
-@pytest.mark.parametrize("device", cpu_and_gpu())
-@pytest.mark.parametrize("expand", [False])  # expand=True does not match D2
-def test_correctness_rotate_bounding_box_on_fixed_input(device, expand):
-    # Check transformation against known expected output
-    format = datapoints.BoundingBoxFormat.XYXY
-    spatial_size = (64, 64)
-    # xyxy format
-    in_boxes = [
-        [1, 1, 5, 5],
-        [1, spatial_size[0] - 6, 5, spatial_size[0] - 2],
-        [spatial_size[1] - 6, spatial_size[0] - 6, spatial_size[1] - 2, spatial_size[0] - 2],
-        [spatial_size[1] // 2 - 10, spatial_size[0] // 2 - 10, spatial_size[1] // 2 + 10, spatial_size[0] // 2 + 10],
-    ]
-    in_boxes = torch.tensor(in_boxes, dtype=torch.float64, device=device)
-    # Tested parameters
-    angle = 45
-    center = None if expand else [12, 23]
-
-    # # Expected bboxes computed using Detectron2:
-    # from detectron2.data.transforms import RotationTransform, AugmentationList
-    # from detectron2.data.transforms import AugInput
-    # import cv2
-    # inpt = AugInput(im1, boxes=np.array(in_boxes, dtype="float32"))
-    # augs = AugmentationList([RotationTransform(*size, angle, expand=expand, center=center, interp=cv2.INTER_NEAREST), ])
-    # out = augs(inpt)
-    # print(inpt.boxes)
-    if expand:
-        expected_bboxes = [
-            [1.65937957, 42.67157288, 7.31623382, 48.32842712],
-            [41.96446609, 82.9766594, 47.62132034, 88.63351365],
-            [82.26955262, 42.67157288, 87.92640687, 48.32842712],
-            [31.35786438, 31.35786438, 59.64213562, 59.64213562],
-        ]
-    else:
-        expected_bboxes = [
-            [-11.33452378, 12.39339828, -5.67766953, 18.05025253],
-            [28.97056275, 52.69848481, 34.627417, 58.35533906],
-            [69.27564928, 12.39339828, 74.93250353, 18.05025253],
-            [18.36396103, 1.07968978, 46.64823228, 29.36396103],
-        ]
-        expected_bboxes = clamp_bounding_box(
-            datapoints.BoundingBox(expected_bboxes, format="XYXY", spatial_size=spatial_size)
-        ).tolist()
-
-    output_boxes, _ = F.rotate_bounding_box(
-        in_boxes,
-        format=format,
-        spatial_size=spatial_size,
-        angle=angle,
-        expand=expand,
-        center=center,
-    )
-
-    torch.testing.assert_close(output_boxes.tolist(), expected_bboxes)
-
-
-@pytest.mark.parametrize("device", cpu_and_gpu())
-def test_correctness_rotate_segmentation_mask_on_fixed_input(device):
-    # Check transformation against known expected output and CPU/CUDA devices
-
-    # Create a fixed input segmentation mask with 2 square masks
-    # in top-left, bottom-left corners
-    mask = torch.zeros(1, 32, 32, dtype=torch.long, device=device)
-    mask[0, 2:10, 2:10] = 1
-    mask[0, 32 - 9 : 32 - 3, 3:9] = 2
-
-    # Rotate 90 degrees
-    expected_mask = torch.rot90(mask, k=1, dims=(-2, -1))
-    out_mask = F.rotate_mask(mask, 90, expand=False)
-    torch.testing.assert_close(out_mask, expected_mask)
-
-
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "format",
     [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH],
@@ -949,19 +722,7 @@ def test_correctness_crop_bounding_box(device, format, top, left, height, width,
     torch.testing.assert_close(output_spatial_size, spatial_size)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
-def test_correctness_horizontal_flip_segmentation_mask_on_fixed_input(device):
-    mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device)
-    mask[:, :, 0] = 1
-
-    out_mask = F.horizontal_flip_mask(mask)
-
-    expected_mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device)
-    expected_mask[:, :, -1] = 1
-    torch.testing.assert_close(out_mask, expected_mask)
-
-
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
     mask = torch.zeros((3, 3, 3), dtype=torch.long, device=device)
     mask[:, 0, :] = 1
@@ -973,7 +734,7 @@ def test_correctness_vertical_flip_segmentation_mask_on_fixed_input(device):
     torch.testing.assert_close(out_mask, expected_mask)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "format",
     [datapoints.BoundingBoxFormat.XYXY, datapoints.BoundingBoxFormat.XYWH, datapoints.BoundingBoxFormat.CXCYWH],
@@ -1032,7 +793,7 @@ def _parse_padding(padding):
     return padding
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("padding", [[1], [1, 1], [1, 1, 2, 2]])
 def test_correctness_pad_bounding_box(device, padding):
     def _compute_expected_bbox(bbox, padding_):
@@ -1087,7 +848,7 @@ def _compute_expected_spatial_size(bbox, padding_):
         torch.testing.assert_close(output_boxes, expected_bboxes, atol=1, rtol=0)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_pad_segmentation_mask_on_fixed_input(device):
     mask = torch.ones((1, 3, 3), dtype=torch.long, device=device)
 
@@ -1098,7 +859,7 @@ def test_correctness_pad_segmentation_mask_on_fixed_input(device):
     torch.testing.assert_close(out_mask, expected_mask)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "startpoints, endpoints",
     [
@@ -1182,7 +943,7 @@ def _compute_expected_bbox(bbox, pcoeffs_):
         torch.testing.assert_close(output_bboxes, expected_bboxes, rtol=0, atol=1)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize(
     "output_size",
     [(18, 18), [18, 15], (16, 19), [12], [46, 48]],
@@ -1236,7 +997,7 @@ def _compute_expected_bbox(bbox, output_size_):
         torch.testing.assert_close(output_spatial_size, output_size)
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("output_size", [[4, 2], [4], [7, 6]])
 def test_correctness_center_crop_mask(device, output_size):
     def _compute_expected_mask(mask, output_size):
@@ -1260,7 +1021,7 @@ def _compute_expected_mask(mask, output_size):
 
 
 # Copied from test/test_functional_tensor.py
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 @pytest.mark.parametrize("spatial_size", ("small", "large"))
 @pytest.mark.parametrize("dt", [None, torch.float32, torch.float64, torch.float16])
 @pytest.mark.parametrize("ksize", [(3, 3), [3, 5], (23, 23)])
@@ -1357,7 +1118,7 @@ def test_equalize_image_tensor_edge_cases():
     assert output.unique().tolist() == [0, 255]
 
 
-@pytest.mark.parametrize("device", cpu_and_gpu())
+@pytest.mark.parametrize("device", cpu_and_cuda())
 def test_correctness_uniform_temporal_subsample(device):
     video = torch.arange(10, device=device)[:, None, None, None].expand(-1, 3, 8, 8)
     out_video = F.uniform_temporal_subsample(video, 5)
diff --git a/test/test_transforms_v2_refactored.py b/test/test_transforms_v2_refactored.py
new file mode 100644
index 00000000000..69180b99dbc
--- /dev/null
+++ b/test/test_transforms_v2_refactored.py
@@ -0,0 +1,1636 @@
+import contextlib
+import inspect
+import math
+import re
+from typing import get_type_hints
+from unittest import mock
+
+import numpy as np
+import PIL.Image
+import pytest
+
+import torch
+import torchvision.transforms.v2 as transforms
+from common_utils import (
+    assert_equal,
+    assert_no_warnings,
+    cache,
+    cpu_and_cuda,
+    ignore_jit_no_profile_information_warning,
+    make_bounding_box,
+    make_detection_mask,
+    make_image,
+    make_image_pil,
+    make_image_tensor,
+    make_segmentation_mask,
+    make_video,
+    set_rng_seed,
+)
+from torch.testing import assert_close
+from torchvision import datapoints
+
+from torchvision.transforms._functional_tensor import _max_value as get_max_value
+from torchvision.transforms.functional import pil_modes_mapping
+from torchvision.transforms.v2 import functional as F
+
+
+@pytest.fixture(autouse=True)
+def fix_rng_seed():
+    set_rng_seed(0)
+    yield
+
+
+def _to_tolerances(maybe_tolerance_dict):
+    if not isinstance(maybe_tolerance_dict, dict):
+        return dict(rtol=None, atol=None)
+
+    tolerances = dict(rtol=0, atol=0)
+    tolerances.update(maybe_tolerance_dict)
+    return tolerances
+
+
+def _check_kernel_cuda_vs_cpu(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel produces closes results for inputs on GPU and CPU."""
+    if input.device.type != "cuda":
+        return
+
+    input_cuda = input.as_subclass(torch.Tensor)
+    input_cpu = input_cuda.to("cpu")
+
+    actual = kernel(input_cuda, *args, **kwargs)
+    expected = kernel(input_cpu, *args, **kwargs)
+
+    assert_close(actual, expected, check_device=False, rtol=rtol, atol=atol)
+
+
+@cache
+def _script(fn):
+    try:
+        return torch.jit.script(fn)
+    except Exception as error:
+        raise AssertionError(f"Trying to `torch.jit.script` '{fn.__name__}' raised the error above.") from error
+
+
+def _check_kernel_scripted_vs_eager(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel is scriptable and if the scripted output is close to the eager one."""
+    if input.device.type != "cpu":
+        return
+
+    kernel_scripted = _script(kernel)
+
+    input = input.as_subclass(torch.Tensor)
+    with ignore_jit_no_profile_information_warning():
+        actual = kernel_scripted(input, *args, **kwargs)
+    expected = kernel(input, *args, **kwargs)
+
+    assert_close(actual, expected, rtol=rtol, atol=atol)
+
+
+def _check_kernel_batched_vs_unbatched(kernel, input, *args, rtol, atol, **kwargs):
+    """Checks if the kernel produces close results for batched and unbatched inputs."""
+    unbatched_input = input.as_subclass(torch.Tensor)
+
+    for batch_dims in [(2,), (2, 1)]:
+        repeats = [*batch_dims, *[1] * input.ndim]
+
+        actual = kernel(unbatched_input.repeat(repeats), *args, **kwargs)
+
+        expected = kernel(unbatched_input, *args, **kwargs)
+        # We can't directly call `.repeat()` on the output, since some kernel also return some additional metadata
+        if isinstance(expected, torch.Tensor):
+            expected = expected.repeat(repeats)
+        else:
+            tensor, *metadata = expected
+            expected = (tensor.repeat(repeats), *metadata)
+
+        assert_close(actual, expected, rtol=rtol, atol=atol)
+
+    for degenerate_batch_dims in [(0,), (5, 0), (0, 5)]:
+        degenerate_batched_input = torch.empty(
+            degenerate_batch_dims + input.shape, dtype=input.dtype, device=input.device
+        )
+
+        output = kernel(degenerate_batched_input, *args, **kwargs)
+        # Most kernels just return a tensor, but some also return some additional metadata
+        if not isinstance(output, torch.Tensor):
+            output, *_ = output
+
+        assert output.shape[: -input.ndim] == degenerate_batch_dims
+
+
+def check_kernel(
+    kernel,
+    input,
+    *args,
+    check_cuda_vs_cpu=True,
+    check_scripted_vs_eager=True,
+    check_batched_vs_unbatched=True,
+    **kwargs,
+):
+    initial_input_version = input._version
+
+    output = kernel(input.as_subclass(torch.Tensor), *args, **kwargs)
+    # Most kernels just return a tensor, but some also return some additional metadata
+    if not isinstance(output, torch.Tensor):
+        output, *_ = output
+
+    # check that no inplace operation happened
+    assert input._version == initial_input_version
+
+    assert output.dtype == input.dtype
+    assert output.device == input.device
+
+    if check_cuda_vs_cpu:
+        _check_kernel_cuda_vs_cpu(kernel, input, *args, **kwargs, **_to_tolerances(check_cuda_vs_cpu))
+
+    if check_scripted_vs_eager:
+        _check_kernel_scripted_vs_eager(kernel, input, *args, **kwargs, **_to_tolerances(check_scripted_vs_eager))
+
+    if check_batched_vs_unbatched:
+        _check_kernel_batched_vs_unbatched(kernel, input, *args, **kwargs, **_to_tolerances(check_batched_vs_unbatched))
+
+
+def _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs):
+    """Checks if the dispatcher can be scripted and the scripted version can be called without error."""
+    if not isinstance(input, datapoints.Image):
+        return
+
+    dispatcher_scripted = _script(dispatcher)
+    with ignore_jit_no_profile_information_warning():
+        dispatcher_scripted(input.as_subclass(torch.Tensor), *args, **kwargs)
+
+
+def _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs):
+    """Checks if the dispatcher correctly dispatches the input to the corresponding kernel and that the input type is
+    preserved in doing so. For bounding boxes also checks that the format is preserved.
+    """
+    if isinstance(input, datapoints._datapoint.Datapoint):
+        # Due to our complex dispatch architecture for datapoints, we cannot spy on the kernel directly,
+        # but rather have to patch the `Datapoint.__F` attribute to contain the spied on kernel.
+        spy = mock.MagicMock(wraps=kernel, name=kernel.__name__)
+        with mock.patch.object(F, kernel.__name__, spy):
+            # Due to Python's name mangling, the `Datapoint.__F` attribute is only accessible from inside the class.
+            # Since that is not the case here, we need to prefix f"_{cls.__name__}"
+            # See https://docs.python.org/3/tutorial/classes.html#private-variables for details
+            with mock.patch.object(datapoints._datapoint.Datapoint, "_Datapoint__F", new=F):
+                output = dispatcher(input, *args, **kwargs)
+
+        spy.assert_called_once()
+    else:
+        with mock.patch(f"{dispatcher.__module__}.{kernel.__name__}", wraps=kernel) as spy:
+            output = dispatcher(input, *args, **kwargs)
+
+            spy.assert_called_once()
+
+    assert isinstance(output, type(input))
+
+    if isinstance(input, datapoints.BoundingBox):
+        assert output.format == input.format
+
+
+def check_dispatcher(
+    dispatcher,
+    kernel,
+    input,
+    *args,
+    check_scripted_smoke=True,
+    check_dispatch=True,
+    **kwargs,
+):
+    with mock.patch("torch._C._log_api_usage_once", wraps=torch._C._log_api_usage_once) as spy:
+        dispatcher(input, *args, **kwargs)
+
+        spy.assert_any_call(f"{dispatcher.__module__}.{dispatcher.__name__}")
+
+    unknown_input = object()
+    with pytest.raises(TypeError, match=re.escape(str(type(unknown_input)))):
+        dispatcher(unknown_input, *args, **kwargs)
+
+    if check_scripted_smoke:
+        _check_dispatcher_scripted_smoke(dispatcher, input, *args, **kwargs)
+
+    if check_dispatch:
+        _check_dispatcher_dispatch(dispatcher, kernel, input, *args, **kwargs)
+
+
+def _check_dispatcher_kernel_signature_match(dispatcher, *, kernel, input_type):
+    """Checks if the signature of the dispatcher matches the kernel signature."""
+    dispatcher_signature = inspect.signature(dispatcher)
+    dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
+
+    kernel_signature = inspect.signature(kernel)
+    kernel_params = list(kernel_signature.parameters.values())[1:]
+
+    if issubclass(input_type, datapoints._datapoint.Datapoint):
+        # We filter out metadata that is implicitly passed to the dispatcher through the input datapoint, but has to be
+        # explicitly passed to the kernel.
+        kernel_params = [param for param in kernel_params if param.name not in input_type.__annotations__.keys()]
+
+    dispatcher_params = iter(dispatcher_params)
+    for dispatcher_param, kernel_param in zip(dispatcher_params, kernel_params):
+        try:
+            # In general, the dispatcher parameters are a superset of the kernel parameters. Thus, we filter out
+            # dispatcher parameters that have no kernel equivalent while keeping the order intact.
+            while dispatcher_param.name != kernel_param.name:
+                dispatcher_param = next(dispatcher_params)
+        except StopIteration:
+            raise AssertionError(
+                f"Parameter `{kernel_param.name}` of kernel `{kernel.__name__}` "
+                f"has no corresponding parameter on the dispatcher `{dispatcher.__name__}`."
+            ) from None
+
+        if issubclass(input_type, PIL.Image.Image):
+            # PIL kernels often have more correct annotations, since they are not limited by JIT. Thus, we don't check
+            # them in the first place.
+            dispatcher_param._annotation = kernel_param._annotation = inspect.Parameter.empty
+
+        assert dispatcher_param == kernel_param
+
+
+def _check_dispatcher_datapoint_signature_match(dispatcher):
+    """Checks if the signature of the dispatcher matches the corresponding method signature on the Datapoint class."""
+    dispatcher_signature = inspect.signature(dispatcher)
+    dispatcher_params = list(dispatcher_signature.parameters.values())[1:]
+
+    datapoint_method = getattr(datapoints._datapoint.Datapoint, dispatcher.__name__)
+    datapoint_signature = inspect.signature(datapoint_method)
+    datapoint_params = list(datapoint_signature.parameters.values())[1:]
+
+    # Some annotations in the `datapoints._datapoint` module
+    # are stored as strings. The block below makes them concrete again (non-strings), so they can be compared to the
+    # natively concrete dispatcher annotations.
+    datapoint_annotations = get_type_hints(datapoint_method)
+    for param in datapoint_params:
+        param._annotation = datapoint_annotations[param.name]
+
+    assert dispatcher_params == datapoint_params
+
+
+def check_dispatcher_signatures_match(dispatcher, *, kernel, input_type):
+    _check_dispatcher_kernel_signature_match(dispatcher, kernel=kernel, input_type=input_type)
+    _check_dispatcher_datapoint_signature_match(dispatcher)
+
+
+def _check_transform_v1_compatibility(transform, input):
+    """If the transform defines the ``_v1_transform_cls`` attribute, checks if the transform has a public, static
+    ``get_params`` method, is scriptable, and the scripted version can be called without error."""
+    if not hasattr(transform, "_v1_transform_cls"):
+        return
+
+    if type(input) is not torch.Tensor:
+        return
+
+    if hasattr(transform._v1_transform_cls, "get_params"):
+        assert type(transform).get_params is transform._v1_transform_cls.get_params
+
+    scripted_transform = _script(transform)
+    with ignore_jit_no_profile_information_warning():
+        scripted_transform(input)
+
+
+def check_transform(transform_cls, input, *args, **kwargs):
+    transform = transform_cls(*args, **kwargs)
+
+    output = transform(input)
+    assert isinstance(output, type(input))
+
+    if isinstance(input, datapoints.BoundingBox):
+        assert output.format == input.format
+
+    _check_transform_v1_compatibility(transform, input)
+
+
+def transform_cls_to_functional(transform_cls, **transform_specific_kwargs):
+    def wrapper(input, *args, **kwargs):
+        transform = transform_cls(*args, **transform_specific_kwargs, **kwargs)
+        return transform(input)
+
+    wrapper.__name__ = transform_cls.__name__
+
+    return wrapper
+
+
+def param_value_parametrization(**kwargs):
+    """Helper function to turn
+
+    @pytest.mark.parametrize(
+        ("param", "value"),
+        ("a", 1),
+        ("a", 2),
+        ("a", 3),
+        ("b", -1.0)
+        ("b", 1.0)
+    )
+
+    into
+
+    @param_value_parametrization(a=[1, 2, 3], b=[-1.0, 1.0])
+    """
+    return pytest.mark.parametrize(
+        ("param", "value"),
+        [(param, value) for param, values in kwargs.items() for value in values],
+    )
+
+
+def adapt_fill(value, *, dtype):
+    """Adapt fill values in the range [0.0, 1.0] to the value range of the dtype"""
+    if value is None:
+        return value
+
+    max_value = get_max_value(dtype)
+
+    if isinstance(value, (int, float)):
+        return type(value)(value * max_value)
+    elif isinstance(value, (list, tuple)):
+        return type(value)(type(v)(v * max_value) for v in value)
+    else:
+        raise ValueError(f"fill should be an int or float, or a list or tuple of the former, but got '{value}'.")
+
+
+EXHAUSTIVE_TYPE_FILLS = [
+    None,
+    1,
+    0.5,
+    [1],
+    [0.2],
+    (0,),
+    (0.7,),
+    [1, 0, 1],
+    [0.1, 0.2, 0.3],
+    (0, 1, 0),
+    (0.9, 0.234, 0.314),
+]
+CORRECTNESS_FILLS = [
+    v for v in EXHAUSTIVE_TYPE_FILLS if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1)
+]
+
+
+# We cannot use `list(transforms.InterpolationMode)` here, since it includes some PIL-only ones as well
+INTERPOLATION_MODES = [
+    transforms.InterpolationMode.NEAREST,
+    transforms.InterpolationMode.NEAREST_EXACT,
+    transforms.InterpolationMode.BILINEAR,
+    transforms.InterpolationMode.BICUBIC,
+]
+
+
+@contextlib.contextmanager
+def assert_warns_antialias_default_value():
+    with pytest.warns(UserWarning, match="The default value of the antialias parameter of all the resizing transforms"):
+        yield
+
+
+def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix):
+    def transform(bbox):
+        # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
+        in_dtype = bbox.dtype
+        if not torch.is_floating_point(bbox):
+            bbox = bbox.float()
+        bbox_xyxy = F.convert_format_bounding_box(
+            bbox.as_subclass(torch.Tensor),
+            old_format=format,
+            new_format=datapoints.BoundingBoxFormat.XYXY,
+            inplace=True,
+        )
+        points = np.array(
+            [
+                [bbox_xyxy[0].item(), bbox_xyxy[1].item(), 1.0],
+                [bbox_xyxy[2].item(), bbox_xyxy[1].item(), 1.0],
+                [bbox_xyxy[0].item(), bbox_xyxy[3].item(), 1.0],
+                [bbox_xyxy[2].item(), bbox_xyxy[3].item(), 1.0],
+            ]
+        )
+        transformed_points = np.matmul(points, affine_matrix.T)
+        out_bbox = torch.tensor(
+            [
+                np.min(transformed_points[:, 0]).item(),
+                np.min(transformed_points[:, 1]).item(),
+                np.max(transformed_points[:, 0]).item(),
+                np.max(transformed_points[:, 1]).item(),
+            ],
+            dtype=bbox_xyxy.dtype,
+        )
+        out_bbox = F.convert_format_bounding_box(
+            out_bbox, old_format=datapoints.BoundingBoxFormat.XYXY, new_format=format, inplace=True
+        )
+        # It is important to clamp before casting, especially for CXCYWH format, dtype=int64
+        out_bbox = F.clamp_bounding_box(out_bbox, format=format, spatial_size=spatial_size)
+        out_bbox = out_bbox.to(dtype=in_dtype)
+        return out_bbox
+
+    return torch.stack([transform(b) for b in bounding_box.reshape(-1, 4).unbind()]).reshape(bounding_box.shape)
+
+
+class TestResize:
+    INPUT_SIZE = (17, 11)
+    OUTPUT_SIZES = [17, [17], (17,), [12, 13], (12, 13)]
+
+    def _make_max_size_kwarg(self, *, use_max_size, size):
+        if use_max_size:
+            if not (isinstance(size, int) or len(size) == 1):
+                # This would result in an `ValueError`
+                return None
+
+            max_size = (size if isinstance(size, int) else size[0]) + 1
+        else:
+            max_size = None
+
+        return dict(max_size=max_size)
+
+    def _compute_output_size(self, *, input_size, size, max_size):
+        if not (isinstance(size, int) or len(size) == 1):
+            return tuple(size)
+
+        if not isinstance(size, int):
+            size = size[0]
+
+        old_height, old_width = input_size
+        ratio = old_width / old_height
+        if ratio > 1:
+            new_height = size
+            new_width = int(ratio * new_height)
+        else:
+            new_width = size
+            new_height = int(new_width / ratio)
+
+        if max_size is not None and max(new_height, new_width) > max_size:
+            # Need to recompute the aspect ratio, since it might have changed due to rounding
+            ratio = new_width / new_height
+            if ratio > 1:
+                new_width = max_size
+                new_height = int(new_width / ratio)
+            else:
+                new_height = max_size
+                new_width = int(new_height * ratio)
+
+        return new_height, new_width
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("antialias", [True, False])
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, size, interpolation, use_max_size, antialias, dtype, device):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        # In contrast to CPU, there is no native `InterpolationMode.BICUBIC` implementation for uint8 images on CUDA.
+        # Internally, it uses the float path. Thus, we need to test with an enormous tolerance here to account for that.
+        atol = 30 if transforms.InterpolationMode.BICUBIC and dtype is torch.uint8 else 1
+        check_cuda_vs_cpu_tolerances = dict(rtol=0, atol=atol / 255 if dtype.is_floating_point else atol)
+
+        check_kernel(
+            F.resize_image_tensor,
+            make_image(self.INPUT_SIZE, dtype=dtype, device=device),
+            size=size,
+            interpolation=interpolation,
+            **max_size_kwarg,
+            antialias=antialias,
+            check_cuda_vs_cpu=check_cuda_vs_cpu_tolerances,
+            check_scripted_vs_eager=not isinstance(size, int),
+        )
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, format, size, use_max_size, dtype, device):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        bounding_box = make_bounding_box(
+            format=format,
+            spatial_size=self.INPUT_SIZE,
+            dtype=dtype,
+            device=device,
+        )
+        check_kernel(
+            F.resize_bounding_box,
+            bounding_box,
+            spatial_size=bounding_box.spatial_size,
+            size=size,
+            **max_size_kwarg,
+            check_scripted_vs_eager=not isinstance(size, int),
+        )
+
+    @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
+    def test_kernel_mask(self, make_mask):
+        check_kernel(F.resize_mask, make_mask(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1])
+
+    def test_kernel_video(self):
+        check_kernel(F.resize_video, make_video(self.INPUT_SIZE), size=self.OUTPUT_SIZES[-1], antialias=True)
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.resize_image_tensor, make_image_tensor),
+            (F.resize_image_pil, make_image_pil),
+            (F.resize_image_tensor, make_image),
+            (F.resize_bounding_box, make_bounding_box),
+            (F.resize_mask, make_segmentation_mask),
+            (F.resize_video, make_video),
+        ],
+    )
+    def test_dispatcher(self, size, kernel, make_input):
+        check_dispatcher(
+            F.resize,
+            kernel,
+            make_input(self.INPUT_SIZE),
+            size=size,
+            antialias=True,
+            check_scripted_smoke=not isinstance(size, int),
+        )
+
+    @pytest.mark.parametrize(
+        ("kernel", "input_type"),
+        [
+            (F.resize_image_tensor, torch.Tensor),
+            (F.resize_image_pil, PIL.Image.Image),
+            (F.resize_image_tensor, datapoints.Image),
+            (F.resize_bounding_box, datapoints.BoundingBox),
+            (F.resize_mask, datapoints.Mask),
+            (F.resize_video, datapoints.Video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.resize, kernel=kernel, input_type=input_type)
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image_tensor,
+            make_image_pil,
+            make_image,
+            make_bounding_box,
+            make_segmentation_mask,
+            make_detection_mask,
+            make_video,
+        ],
+    )
+    def test_transform(self, size, device, make_input):
+        check_transform(transforms.Resize, make_input(self.INPUT_SIZE, device=device), size=size, antialias=True)
+
+    def _check_output_size(self, input, output, *, size, max_size):
+        assert tuple(F.get_spatial_size(output)) == self._compute_output_size(
+            input_size=F.get_spatial_size(input), size=size, max_size=max_size
+        )
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    # `InterpolationMode.NEAREST` is modeled after the buggy `INTER_NEAREST` interpolation of CV2.
+    # The PIL equivalent of `InterpolationMode.NEAREST` is `InterpolationMode.NEAREST_EXACT`
+    @pytest.mark.parametrize("interpolation", set(INTERPOLATION_MODES) - {transforms.InterpolationMode.NEAREST})
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    def test_image_correctness(self, size, interpolation, use_max_size, fn):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        image = make_image(self.INPUT_SIZE, dtype=torch.uint8)
+
+        actual = fn(image, size=size, interpolation=interpolation, **max_size_kwarg, antialias=True)
+        expected = F.to_image_tensor(
+            F.resize(F.to_image_pil(image), size=size, interpolation=interpolation, **max_size_kwarg)
+        )
+
+        self._check_output_size(image, actual, size=size, **max_size_kwarg)
+        torch.testing.assert_close(actual, expected, atol=1, rtol=0)
+
+    def _reference_resize_bounding_box(self, bounding_box, *, size, max_size=None):
+        old_height, old_width = bounding_box.spatial_size
+        new_height, new_width = self._compute_output_size(
+            input_size=bounding_box.spatial_size, size=size, max_size=max_size
+        )
+
+        if (old_height, old_width) == (new_height, new_width):
+            return bounding_box
+
+        affine_matrix = np.array(
+            [
+                [new_width / old_width, 0, 0],
+                [0, new_height / old_height, 0],
+            ],
+            dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
+        )
+
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=(new_height, new_width),
+            affine_matrix=affine_matrix,
+        )
+        return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes, spatial_size=(new_height, new_width))
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize("use_max_size", [True, False])
+    @pytest.mark.parametrize("fn", [F.resize, transform_cls_to_functional(transforms.Resize)])
+    def test_bounding_box_correctness(self, format, size, use_max_size, fn):
+        if not (max_size_kwarg := self._make_max_size_kwarg(use_max_size=use_max_size, size=size)):
+            return
+
+        bounding_box = make_bounding_box(format=format, spatial_size=self.INPUT_SIZE)
+
+        actual = fn(bounding_box, size=size, **max_size_kwarg)
+        expected = self._reference_resize_bounding_box(bounding_box, size=size, **max_size_kwarg)
+
+        self._check_output_size(bounding_box, actual, size=size, **max_size_kwarg)
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize("interpolation", set(transforms.InterpolationMode) - set(INTERPOLATION_MODES))
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_video],
+    )
+    def test_pil_interpolation_compat_smoke(self, interpolation, make_input):
+        input = make_input(self.INPUT_SIZE)
+
+        with (
+            contextlib.nullcontext()
+            if isinstance(input, PIL.Image.Image)
+            # This error is triggered in PyTorch core
+            else pytest.raises(NotImplementedError, match=f"got {interpolation.value.lower()}")
+        ):
+            F.resize(
+                input,
+                size=self.OUTPUT_SIZES[0],
+                interpolation=interpolation,
+            )
+
+    def test_dispatcher_pil_antialias_warning(self):
+        with pytest.warns(UserWarning, match="Anti-alias option is always applied for PIL Image input"):
+            F.resize(make_image_pil(self.INPUT_SIZE), size=self.OUTPUT_SIZES[0], antialias=False)
+
+    @pytest.mark.parametrize("size", OUTPUT_SIZES)
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image_tensor,
+            make_image_pil,
+            make_image,
+            make_bounding_box,
+            make_segmentation_mask,
+            make_detection_mask,
+            make_video,
+        ],
+    )
+    def test_max_size_error(self, size, make_input):
+        if isinstance(size, int) or len(size) == 1:
+            max_size = (size if isinstance(size, int) else size[0]) - 1
+            match = "must be strictly greater than the requested size"
+        else:
+            # value can be anything other than None
+            max_size = -1
+            match = "size should be an int or a sequence of length 1"
+
+        with pytest.raises(ValueError, match=match):
+            F.resize(make_input(self.INPUT_SIZE), size=size, max_size=max_size, antialias=True)
+
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image, make_video],
+    )
+    def test_antialias_warning(self, interpolation, make_input):
+        with (
+            assert_warns_antialias_default_value()
+            if interpolation in {transforms.InterpolationMode.BILINEAR, transforms.InterpolationMode.BICUBIC}
+            else assert_no_warnings()
+        ):
+            F.resize(
+                make_input(self.INPUT_SIZE),
+                size=self.OUTPUT_SIZES[0],
+                interpolation=interpolation,
+            )
+
+    @pytest.mark.parametrize("interpolation", INTERPOLATION_MODES)
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_video],
+    )
+    def test_interpolation_int(self, interpolation, make_input):
+        input = make_input(self.INPUT_SIZE)
+
+        # `InterpolationMode.NEAREST_EXACT` has no proper corresponding integer equivalent. Internally, we map it to
+        # `0` to be the same as `InterpolationMode.NEAREST` for PIL. However, for the tensor backend there is a
+        # difference and thus we don't test it here.
+        if isinstance(input, torch.Tensor) and interpolation is transforms.InterpolationMode.NEAREST_EXACT:
+            return
+
+        expected = F.resize(input, size=self.OUTPUT_SIZES[0], interpolation=interpolation, antialias=True)
+        actual = F.resize(
+            input, size=self.OUTPUT_SIZES[0], interpolation=pil_modes_mapping[interpolation], antialias=True
+        )
+
+        assert_equal(actual, expected)
+
+    def test_transform_unknown_size_error(self):
+        with pytest.raises(ValueError, match="size can either be an integer or a list or tuple of one or two integers"):
+            transforms.Resize(size=object())
+
+    @pytest.mark.parametrize(
+        "size", [min(INPUT_SIZE), [min(INPUT_SIZE)], (min(INPUT_SIZE),), list(INPUT_SIZE), tuple(INPUT_SIZE)]
+    )
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image_tensor,
+            make_image_pil,
+            make_image,
+            make_bounding_box,
+            make_segmentation_mask,
+            make_detection_mask,
+            make_video,
+        ],
+    )
+    def test_noop(self, size, make_input):
+        input = make_input(self.INPUT_SIZE)
+
+        output = F.resize(input, size=F.get_spatial_size(input), antialias=True)
+
+        # This identity check is not a requirement. It is here to avoid breaking the behavior by accident. If there
+        # is a good reason to break this, feel free to downgrade to an equality check.
+        if isinstance(input, datapoints._datapoint.Datapoint):
+            # We can't test identity directly, since that checks for the identity of the Python object. Since all
+            # datapoints unwrap before a kernel and wrap again afterwards, the Python object changes. Thus, we check
+            # that the underlying storage is the same
+            assert output.data_ptr() == input.data_ptr()
+        else:
+            assert output is input
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [
+            make_image_tensor,
+            make_image_pil,
+            make_image,
+            make_bounding_box,
+            make_segmentation_mask,
+            make_detection_mask,
+            make_video,
+        ],
+    )
+    def test_no_regression_5405(self, make_input):
+        # Checks that `max_size` is not ignored if `size == small_edge_size`
+        # See https://github.com/pytorch/vision/issues/5405
+
+        input = make_input(self.INPUT_SIZE)
+
+        size = min(F.get_spatial_size(input))
+        max_size = size + 1
+        output = F.resize(input, size=size, max_size=max_size, antialias=True)
+
+        assert max(F.get_spatial_size(output)) == max_size
+
+
+class TestHorizontalFlip:
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, dtype, device):
+        check_kernel(F.horizontal_flip_image_tensor, make_image(dtype=dtype, device=device))
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, format, dtype, device):
+        bounding_box = make_bounding_box(format=format, dtype=dtype, device=device)
+        check_kernel(
+            F.horizontal_flip_bounding_box,
+            bounding_box,
+            format=format,
+            spatial_size=bounding_box.spatial_size,
+        )
+
+    @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
+    def test_kernel_mask(self, make_mask):
+        check_kernel(F.horizontal_flip_mask, make_mask())
+
+    def test_kernel_video(self):
+        check_kernel(F.horizontal_flip_video, make_video())
+
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.horizontal_flip_image_tensor, make_image_tensor),
+            (F.horizontal_flip_image_pil, make_image_pil),
+            (F.horizontal_flip_image_tensor, make_image),
+            (F.horizontal_flip_bounding_box, make_bounding_box),
+            (F.horizontal_flip_mask, make_segmentation_mask),
+            (F.horizontal_flip_video, make_video),
+        ],
+    )
+    def test_dispatcher(self, kernel, make_input):
+        check_dispatcher(F.horizontal_flip, kernel, make_input())
+
+    @pytest.mark.parametrize(
+        ("kernel", "input_type"),
+        [
+            (F.horizontal_flip_image_tensor, torch.Tensor),
+            (F.horizontal_flip_image_pil, PIL.Image.Image),
+            (F.horizontal_flip_image_tensor, datapoints.Image),
+            (F.horizontal_flip_bounding_box, datapoints.BoundingBox),
+            (F.horizontal_flip_mask, datapoints.Mask),
+            (F.horizontal_flip_video, datapoints.Video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.horizontal_flip, kernel=kernel, input_type=input_type)
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_transform(self, make_input, device):
+        check_transform(transforms.RandomHorizontalFlip, make_input(device=device), p=1)
+
+    @pytest.mark.parametrize(
+        "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
+    )
+    def test_image_correctness(self, fn):
+        image = make_image(dtype=torch.uint8, device="cpu")
+
+        actual = fn(image)
+        expected = F.to_image_tensor(F.horizontal_flip(F.to_image_pil(image)))
+
+        torch.testing.assert_close(actual, expected)
+
+    def _reference_horizontal_flip_bounding_box(self, bounding_box):
+        affine_matrix = np.array(
+            [
+                [-1, 0, bounding_box.spatial_size[1]],
+                [0, 1, 0],
+            ],
+            dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
+        )
+
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=bounding_box.spatial_size,
+            affine_matrix=affine_matrix,
+        )
+
+        return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes)
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize(
+        "fn", [F.horizontal_flip, transform_cls_to_functional(transforms.RandomHorizontalFlip, p=1)]
+    )
+    def test_bounding_box_correctness(self, format, fn):
+        bounding_box = make_bounding_box(format=format)
+
+        actual = fn(bounding_box)
+        expected = self._reference_horizontal_flip_bounding_box(bounding_box)
+
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_transform_noop(self, make_input, device):
+        input = make_input(device=device)
+
+        transform = transforms.RandomHorizontalFlip(p=0)
+
+        output = transform(input)
+
+        assert_equal(output, input)
+
+
+class TestAffine:
+    _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict(
+        # float, int
+        angle=[-10.9, 18],
+        # two-list of float, two-list of int, two-tuple of float, two-tuple of int
+        translate=[[6.3, -0.6], [1, -3], (16.6, -6.6), (-2, 4)],
+        # float
+        scale=[0.5],
+        # float, int,
+        # one-list of float, one-list of int, one-tuple of float, one-tuple of int
+        # two-list of float, two-list of int, two-tuple of float, two-tuple of int
+        shear=[35.6, 38, [-37.7], [-23], (5.3,), (-52,), [5.4, 21.8], [-47, 51], (-11.2, 36.7), (8, -53)],
+        # None
+        # two-list of float, two-list of int, two-tuple of float, two-tuple of int
+        center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)],
+    )
+    # The special case for shear makes sure we pick a value that is supported while JIT scripting
+    _MINIMAL_AFFINE_KWARGS = {
+        k: vs[0] if k != "shear" else next(v for v in vs if isinstance(v, list))
+        for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()
+    }
+    _CORRECTNESS_AFFINE_KWARGS = {
+        k: [v for v in vs if v is None or isinstance(v, float) or (isinstance(v, list) and len(v) > 1)]
+        for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()
+    }
+
+    _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict(
+        degrees=[30, (-15, 20)],
+        translate=[None, (0.5, 0.5)],
+        scale=[None, (0.75, 1.25)],
+        shear=[None, (12, 30, -17, 5), 10, (-5, 12)],
+    )
+    _CORRECTNESS_TRANSFORM_AFFINE_RANGES = {
+        k: next(v for v in vs if v is not None) for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items()
+    }
+
+    def _check_kernel(self, kernel, input, *args, **kwargs):
+        kwargs_ = self._MINIMAL_AFFINE_KWARGS.copy()
+        kwargs_.update(kwargs)
+        check_kernel(kernel, input, *args, **kwargs_)
+
+    @param_value_parametrization(
+        angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
+        translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"],
+        shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
+        center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
+        interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR],
+        fill=EXHAUSTIVE_TYPE_FILLS,
+    )
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, param, value, dtype, device):
+        if param == "fill":
+            value = adapt_fill(value, dtype=dtype)
+        self._check_kernel(
+            F.affine_image_tensor,
+            make_image(dtype=dtype, device=device),
+            **{param: value},
+            check_scripted_vs_eager=not (param in {"shear", "fill"} and isinstance(value, (int, float))),
+            check_cuda_vs_cpu=dict(atol=1, rtol=0)
+            if dtype is torch.uint8 and param == "interpolation" and value is transforms.InterpolationMode.BILINEAR
+            else True,
+        )
+
+    @param_value_parametrization(
+        angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
+        translate=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["translate"],
+        shear=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["shear"],
+        center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
+    )
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, param, value, format, dtype, device):
+        bounding_box = make_bounding_box(format=format, dtype=dtype, device=device)
+        self._check_kernel(
+            F.affine_bounding_box,
+            bounding_box,
+            format=format,
+            spatial_size=bounding_box.spatial_size,
+            **{param: value},
+            check_scripted_vs_eager=not (param == "shear" and isinstance(value, (int, float))),
+        )
+
+    @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
+    def test_kernel_mask(self, make_mask):
+        self._check_kernel(F.affine_mask, make_mask())
+
+    def test_kernel_video(self):
+        self._check_kernel(F.affine_video, make_video())
+
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.affine_image_tensor, make_image_tensor),
+            (F.affine_image_pil, make_image_pil),
+            (F.affine_image_tensor, make_image),
+            (F.affine_bounding_box, make_bounding_box),
+            (F.affine_mask, make_segmentation_mask),
+            (F.affine_video, make_video),
+        ],
+    )
+    def test_dispatcher(self, kernel, make_input):
+        check_dispatcher(F.affine, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS)
+
+    @pytest.mark.parametrize(
+        ("kernel", "input_type"),
+        [
+            (F.affine_image_tensor, torch.Tensor),
+            (F.affine_image_pil, PIL.Image.Image),
+            (F.affine_image_tensor, datapoints.Image),
+            (F.affine_bounding_box, datapoints.BoundingBox),
+            (F.affine_mask, datapoints.Mask),
+            (F.affine_video, datapoints.Video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.affine, kernel=kernel, input_type=input_type)
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_transform(self, make_input, device):
+        input = make_input(device=device)
+
+        check_transform(transforms.RandomAffine, input, **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES)
+
+    @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
+    @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
+    @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
+    @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"])
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    @pytest.mark.parametrize(
+        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
+    )
+    @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
+    def test_functional_image_correctness(self, angle, translate, scale, shear, center, interpolation, fill):
+        image = make_image(dtype=torch.uint8, device="cpu")
+
+        fill = adapt_fill(fill, dtype=torch.uint8)
+
+        actual = F.affine(
+            image,
+            angle=angle,
+            translate=translate,
+            scale=scale,
+            shear=shear,
+            center=center,
+            interpolation=interpolation,
+            fill=fill,
+        )
+        expected = F.to_image_tensor(
+            F.affine(
+                F.to_image_pil(image),
+                angle=angle,
+                translate=translate,
+                scale=scale,
+                shear=shear,
+                center=center,
+                interpolation=interpolation,
+                fill=fill,
+            )
+        )
+
+        mae = (actual.float() - expected.float()).abs().mean()
+        assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8
+
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    @pytest.mark.parametrize(
+        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
+    )
+    @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
+    @pytest.mark.parametrize("seed", list(range(5)))
+    def test_transform_image_correctness(self, center, interpolation, fill, seed):
+        image = make_image(dtype=torch.uint8, device="cpu")
+
+        fill = adapt_fill(fill, dtype=torch.uint8)
+
+        transform = transforms.RandomAffine(
+            **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center, interpolation=interpolation, fill=fill
+        )
+
+        torch.manual_seed(seed)
+        actual = transform(image)
+
+        torch.manual_seed(seed)
+        expected = F.to_image_tensor(transform(F.to_image_pil(image)))
+
+        mae = (actual.float() - expected.float()).abs().mean()
+        assert mae < 2 if interpolation is transforms.InterpolationMode.NEAREST else 8
+
+    def _compute_affine_matrix(self, *, angle, translate, scale, shear, center):
+        rot = math.radians(angle)
+        cx, cy = center
+        tx, ty = translate
+        sx, sy = [math.radians(s) for s in ([shear, 0.0] if isinstance(shear, (int, float)) else shear)]
+
+        c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]])
+        t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
+        c_matrix_inv = np.linalg.inv(c_matrix)
+        rs_matrix = np.array(
+            [
+                [scale * math.cos(rot), -scale * math.sin(rot), 0],
+                [scale * math.sin(rot), scale * math.cos(rot), 0],
+                [0, 0, 1],
+            ]
+        )
+        shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]])
+        shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]])
+        rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix))
+        true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv)))
+        return true_matrix
+
+    def _reference_affine_bounding_box(self, bounding_box, *, angle, translate, scale, shear, center):
+        if center is None:
+            center = [s * 0.5 for s in bounding_box.spatial_size[::-1]]
+
+        affine_matrix = self._compute_affine_matrix(
+            angle=angle, translate=translate, scale=scale, shear=shear, center=center
+        )
+        affine_matrix = affine_matrix[:2, :]
+
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=bounding_box.spatial_size,
+            affine_matrix=affine_matrix,
+        )
+
+        return expected_bboxes
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
+    @pytest.mark.parametrize("translate", _CORRECTNESS_AFFINE_KWARGS["translate"])
+    @pytest.mark.parametrize("scale", _CORRECTNESS_AFFINE_KWARGS["scale"])
+    @pytest.mark.parametrize("shear", _CORRECTNESS_AFFINE_KWARGS["shear"])
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    def test_functional_bounding_box_correctness(self, format, angle, translate, scale, shear, center):
+        bounding_box = make_bounding_box(format=format)
+
+        actual = F.affine(
+            bounding_box,
+            angle=angle,
+            translate=translate,
+            scale=scale,
+            shear=shear,
+            center=center,
+        )
+        expected = self._reference_affine_bounding_box(
+            bounding_box,
+            angle=angle,
+            translate=translate,
+            scale=scale,
+            shear=shear,
+            center=center,
+        )
+
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    @pytest.mark.parametrize("seed", list(range(5)))
+    def test_transform_bounding_box_correctness(self, format, center, seed):
+        bounding_box = make_bounding_box(format=format)
+
+        transform = transforms.RandomAffine(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, center=center)
+
+        torch.manual_seed(seed)
+        params = transform._get_params([bounding_box])
+
+        torch.manual_seed(seed)
+        actual = transform(bounding_box)
+
+        expected = self._reference_affine_bounding_box(bounding_box, **params, center=center)
+
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"])
+    @pytest.mark.parametrize("translate", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["translate"])
+    @pytest.mark.parametrize("scale", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["scale"])
+    @pytest.mark.parametrize("shear", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["shear"])
+    @pytest.mark.parametrize("seed", list(range(10)))
+    def test_transform_get_params_bounds(self, degrees, translate, scale, shear, seed):
+        image = make_image()
+        height, width = F.get_spatial_size(image)
+
+        transform = transforms.RandomAffine(degrees=degrees, translate=translate, scale=scale, shear=shear)
+
+        torch.manual_seed(seed)
+        params = transform._get_params([image])
+
+        if isinstance(degrees, (int, float)):
+            assert -degrees <= params["angle"] <= degrees
+        else:
+            assert degrees[0] <= params["angle"] <= degrees[1]
+
+        if translate is not None:
+            width_max = int(round(translate[0] * width))
+            height_max = int(round(translate[1] * height))
+            assert -width_max <= params["translate"][0] <= width_max
+            assert -height_max <= params["translate"][1] <= height_max
+        else:
+            assert params["translate"] == (0, 0)
+
+        if scale is not None:
+            assert scale[0] <= params["scale"] <= scale[1]
+        else:
+            assert params["scale"] == 1.0
+
+        if shear is not None:
+            if isinstance(shear, (int, float)):
+                assert -shear <= params["shear"][0] <= shear
+                assert params["shear"][1] == 0.0
+            elif len(shear) == 2:
+                assert shear[0] <= params["shear"][0] <= shear[1]
+                assert params["shear"][1] == 0.0
+            elif len(shear) == 4:
+                assert shear[0] <= params["shear"][0] <= shear[1]
+                assert shear[2] <= params["shear"][1] <= shear[3]
+        else:
+            assert params["shear"] == (0, 0)
+
+    @pytest.mark.parametrize("param", ["degrees", "translate", "scale", "shear", "center"])
+    @pytest.mark.parametrize("value", [0, [0], [0, 0, 0]])
+    def test_transform_sequence_len_errors(self, param, value):
+        if param in {"degrees", "shear"} and not isinstance(value, list):
+            return
+
+        kwargs = {param: value}
+        if param != "degrees":
+            kwargs["degrees"] = 0
+
+        with pytest.raises(
+            ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2"
+        ):
+            transforms.RandomAffine(**kwargs)
+
+    def test_transform_negative_degrees_error(self):
+        with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"):
+            transforms.RandomAffine(degrees=-1)
+
+    @pytest.mark.parametrize("translate", [[-1, 0], [2, 0], [-1, 2]])
+    def test_transform_translate_range_error(self, translate):
+        with pytest.raises(ValueError, match="translation values should be between 0 and 1"):
+            transforms.RandomAffine(degrees=0, translate=translate)
+
+    @pytest.mark.parametrize("scale", [[-1, 0], [0, -1], [-1, -1]])
+    def test_transform_scale_range_error(self, scale):
+        with pytest.raises(ValueError, match="scale values should be positive"):
+            transforms.RandomAffine(degrees=0, scale=scale)
+
+    def test_transform_negative_shear_error(self):
+        with pytest.raises(ValueError, match="If shear is a single number, it must be positive"):
+            transforms.RandomAffine(degrees=0, shear=-1)
+
+    def test_transform_unknown_fill_error(self):
+        with pytest.raises(TypeError, match="Got inappropriate fill arg"):
+            transforms.RandomAffine(degrees=0, fill="fill")
+
+
+class TestVerticalFlip:
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, dtype, device):
+        check_kernel(F.vertical_flip_image_tensor, make_image(dtype=dtype, device=device))
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.int64])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, format, dtype, device):
+        bounding_box = make_bounding_box(format=format, dtype=dtype, device=device)
+        check_kernel(
+            F.vertical_flip_bounding_box,
+            bounding_box,
+            format=format,
+            spatial_size=bounding_box.spatial_size,
+        )
+
+    @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
+    def test_kernel_mask(self, make_mask):
+        check_kernel(F.vertical_flip_mask, make_mask())
+
+    def test_kernel_video(self):
+        check_kernel(F.vertical_flip_video, make_video())
+
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.vertical_flip_image_tensor, make_image_tensor),
+            (F.vertical_flip_image_pil, make_image_pil),
+            (F.vertical_flip_image_tensor, make_image),
+            (F.vertical_flip_bounding_box, make_bounding_box),
+            (F.vertical_flip_mask, make_segmentation_mask),
+            (F.vertical_flip_video, make_video),
+        ],
+    )
+    def test_dispatcher(self, kernel, make_input):
+        check_dispatcher(F.vertical_flip, kernel, make_input())
+
+    @pytest.mark.parametrize(
+        ("kernel", "input_type"),
+        [
+            (F.vertical_flip_image_tensor, torch.Tensor),
+            (F.vertical_flip_image_pil, PIL.Image.Image),
+            (F.vertical_flip_image_tensor, datapoints.Image),
+            (F.vertical_flip_bounding_box, datapoints.BoundingBox),
+            (F.vertical_flip_mask, datapoints.Mask),
+            (F.vertical_flip_video, datapoints.Video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.vertical_flip, kernel=kernel, input_type=input_type)
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_transform(self, make_input, device):
+        check_transform(transforms.RandomVerticalFlip, make_input(device=device), p=1)
+
+    @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
+    def test_image_correctness(self, fn):
+        image = make_image(dtype=torch.uint8, device="cpu")
+
+        actual = fn(image)
+        expected = F.to_image_tensor(F.vertical_flip(F.to_image_pil(image)))
+
+        torch.testing.assert_close(actual, expected)
+
+    def _reference_vertical_flip_bounding_box(self, bounding_box):
+        affine_matrix = np.array(
+            [
+                [1, 0, 0],
+                [0, -1, bounding_box.spatial_size[0]],
+            ],
+            dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
+        )
+
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=bounding_box.spatial_size,
+            affine_matrix=affine_matrix,
+        )
+
+        return datapoints.BoundingBox.wrap_like(bounding_box, expected_bboxes)
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("fn", [F.vertical_flip, transform_cls_to_functional(transforms.RandomVerticalFlip, p=1)])
+    def test_bounding_box_correctness(self, format, fn):
+        bounding_box = make_bounding_box(format=format)
+
+        actual = fn(bounding_box)
+        expected = self._reference_vertical_flip_bounding_box(bounding_box)
+
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_transform_noop(self, make_input, device):
+        input = make_input(device=device)
+
+        transform = transforms.RandomVerticalFlip(p=0)
+
+        output = transform(input)
+
+        assert_equal(output, input)
+
+
+class TestRotate:
+    _EXHAUSTIVE_TYPE_AFFINE_KWARGS = dict(
+        # float, int
+        angle=[-10.9, 18],
+        # None
+        # two-list of float, two-list of int, two-tuple of float, two-tuple of int
+        center=[None, [1.2, 4.9], [-3, 1], (2.5, -4.7), (3, 2)],
+    )
+    _MINIMAL_AFFINE_KWARGS = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()}
+    _CORRECTNESS_AFFINE_KWARGS = {
+        k: [v for v in vs if v is None or isinstance(v, float) or isinstance(v, list)]
+        for k, vs in _EXHAUSTIVE_TYPE_AFFINE_KWARGS.items()
+    }
+
+    _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES = dict(
+        degrees=[30, (-15, 20)],
+    )
+    _CORRECTNESS_TRANSFORM_AFFINE_RANGES = {k: vs[0] for k, vs in _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES.items()}
+
+    @param_value_parametrization(
+        angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
+        interpolation=[transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR],
+        expand=[False, True],
+        center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
+        fill=EXHAUSTIVE_TYPE_FILLS,
+    )
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_image_tensor(self, param, value, dtype, device):
+        kwargs = {param: value}
+        if param != "angle":
+            kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
+        check_kernel(
+            F.rotate_image_tensor,
+            make_image(dtype=dtype, device=device),
+            **kwargs,
+            check_scripted_vs_eager=not (param == "fill" and isinstance(value, (int, float))),
+        )
+
+    @param_value_parametrization(
+        angle=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["angle"],
+        expand=[False, True],
+        center=_EXHAUSTIVE_TYPE_AFFINE_KWARGS["center"],
+    )
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("dtype", [torch.float32, torch.uint8])
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_kernel_bounding_box(self, param, value, format, dtype, device):
+        kwargs = {param: value}
+        if param != "angle":
+            kwargs["angle"] = self._MINIMAL_AFFINE_KWARGS["angle"]
+
+        bounding_box = make_bounding_box(format=format, dtype=dtype, device=device)
+
+        check_kernel(
+            F.rotate_bounding_box,
+            bounding_box,
+            format=format,
+            spatial_size=bounding_box.spatial_size,
+            **kwargs,
+        )
+
+    @pytest.mark.parametrize("make_mask", [make_segmentation_mask, make_detection_mask])
+    def test_kernel_mask(self, make_mask):
+        check_kernel(F.rotate_mask, make_mask(), **self._MINIMAL_AFFINE_KWARGS)
+
+    def test_kernel_video(self):
+        check_kernel(F.rotate_video, make_video(), **self._MINIMAL_AFFINE_KWARGS)
+
+    @pytest.mark.parametrize(
+        ("kernel", "make_input"),
+        [
+            (F.rotate_image_tensor, make_image_tensor),
+            (F.rotate_image_pil, make_image_pil),
+            (F.rotate_image_tensor, make_image),
+            (F.rotate_bounding_box, make_bounding_box),
+            (F.rotate_mask, make_segmentation_mask),
+            (F.rotate_video, make_video),
+        ],
+    )
+    def test_dispatcher(self, kernel, make_input):
+        check_dispatcher(F.rotate, kernel, make_input(), **self._MINIMAL_AFFINE_KWARGS)
+
+    @pytest.mark.parametrize(
+        ("kernel", "input_type"),
+        [
+            (F.rotate_image_tensor, torch.Tensor),
+            (F.rotate_image_pil, PIL.Image.Image),
+            (F.rotate_image_tensor, datapoints.Image),
+            (F.rotate_bounding_box, datapoints.BoundingBox),
+            (F.rotate_mask, datapoints.Mask),
+            (F.rotate_video, datapoints.Video),
+        ],
+    )
+    def test_dispatcher_signature(self, kernel, input_type):
+        check_dispatcher_signatures_match(F.rotate, kernel=kernel, input_type=input_type)
+
+    @pytest.mark.parametrize(
+        "make_input",
+        [make_image_tensor, make_image_pil, make_image, make_bounding_box, make_segmentation_mask, make_video],
+    )
+    @pytest.mark.parametrize("device", cpu_and_cuda())
+    def test_transform(self, make_input, device):
+        check_transform(
+            transforms.RandomRotation, make_input(device=device), **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES
+        )
+
+    @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    @pytest.mark.parametrize(
+        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
+    )
+    @pytest.mark.parametrize("expand", [False, True])
+    @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
+    def test_functional_image_correctness(self, angle, center, interpolation, expand, fill):
+        image = make_image(dtype=torch.uint8, device="cpu")
+
+        fill = adapt_fill(fill, dtype=torch.uint8)
+
+        actual = F.rotate(image, angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill)
+        expected = F.to_image_tensor(
+            F.rotate(
+                F.to_image_pil(image), angle=angle, center=center, interpolation=interpolation, expand=expand, fill=fill
+            )
+        )
+
+        mae = (actual.float() - expected.float()).abs().mean()
+        assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
+
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    @pytest.mark.parametrize(
+        "interpolation", [transforms.InterpolationMode.NEAREST, transforms.InterpolationMode.BILINEAR]
+    )
+    @pytest.mark.parametrize("expand", [False, True])
+    @pytest.mark.parametrize("fill", CORRECTNESS_FILLS)
+    @pytest.mark.parametrize("seed", list(range(5)))
+    def test_transform_image_correctness(self, center, interpolation, expand, fill, seed):
+        image = make_image(dtype=torch.uint8, device="cpu")
+
+        fill = adapt_fill(fill, dtype=torch.uint8)
+
+        transform = transforms.RandomRotation(
+            **self._CORRECTNESS_TRANSFORM_AFFINE_RANGES,
+            center=center,
+            interpolation=interpolation,
+            expand=expand,
+            fill=fill,
+        )
+
+        torch.manual_seed(seed)
+        actual = transform(image)
+
+        torch.manual_seed(seed)
+        expected = F.to_image_tensor(transform(F.to_image_pil(image)))
+
+        mae = (actual.float() - expected.float()).abs().mean()
+        assert mae < 1 if interpolation is transforms.InterpolationMode.NEAREST else 6
+
+    def _reference_rotate_bounding_box(self, bounding_box, *, angle, expand, center):
+        # FIXME
+        if expand:
+            raise ValueError("This reference currently does not support expand=True")
+
+        if center is None:
+            center = [s * 0.5 for s in bounding_box.spatial_size[::-1]]
+
+        a = np.cos(angle * np.pi / 180.0)
+        b = np.sin(angle * np.pi / 180.0)
+        cx = center[0]
+        cy = center[1]
+        affine_matrix = np.array(
+            [
+                [a, b, cx - cx * a - b * cy],
+                [-b, a, cy + cx * b - a * cy],
+            ],
+            dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
+        )
+
+        expected_bboxes = reference_affine_bounding_box_helper(
+            bounding_box,
+            format=bounding_box.format,
+            spatial_size=bounding_box.spatial_size,
+            affine_matrix=affine_matrix,
+        )
+
+        return expected_bboxes
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    @pytest.mark.parametrize("angle", _CORRECTNESS_AFFINE_KWARGS["angle"])
+    # TODO: add support for expand=True in the reference
+    @pytest.mark.parametrize("expand", [False])
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    def test_functional_bounding_box_correctness(self, format, angle, expand, center):
+        bounding_box = make_bounding_box(format=format)
+
+        actual = F.rotate(bounding_box, angle=angle, expand=expand, center=center)
+        expected = self._reference_rotate_bounding_box(bounding_box, angle=angle, expand=expand, center=center)
+
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize("format", list(datapoints.BoundingBoxFormat))
+    # TODO: add support for expand=True in the reference
+    @pytest.mark.parametrize("expand", [False])
+    @pytest.mark.parametrize("center", _CORRECTNESS_AFFINE_KWARGS["center"])
+    @pytest.mark.parametrize("seed", list(range(5)))
+    def test_transform_bounding_box_correctness(self, format, expand, center, seed):
+        bounding_box = make_bounding_box(format=format)
+
+        transform = transforms.RandomRotation(**self._CORRECTNESS_TRANSFORM_AFFINE_RANGES, expand=expand, center=center)
+
+        torch.manual_seed(seed)
+        params = transform._get_params([bounding_box])
+
+        torch.manual_seed(seed)
+        actual = transform(bounding_box)
+
+        expected = self._reference_rotate_bounding_box(bounding_box, **params, expand=expand, center=center)
+
+        torch.testing.assert_close(actual, expected)
+
+    @pytest.mark.parametrize("degrees", _EXHAUSTIVE_TYPE_TRANSFORM_AFFINE_RANGES["degrees"])
+    @pytest.mark.parametrize("seed", list(range(10)))
+    def test_transform_get_params_bounds(self, degrees, seed):
+        transform = transforms.RandomRotation(degrees=degrees)
+
+        torch.manual_seed(seed)
+        params = transform._get_params([])
+
+        if isinstance(degrees, (int, float)):
+            assert -degrees <= params["angle"] <= degrees
+        else:
+            assert degrees[0] <= params["angle"] <= degrees[1]
+
+    @pytest.mark.parametrize("param", ["degrees", "center"])
+    @pytest.mark.parametrize("value", [0, [0], [0, 0, 0]])
+    def test_transform_sequence_len_errors(self, param, value):
+        if param == "degrees" and not isinstance(value, list):
+            return
+
+        kwargs = {param: value}
+        if param != "degrees":
+            kwargs["degrees"] = 0
+
+        with pytest.raises(
+            ValueError if isinstance(value, list) else TypeError, match=f"{param} should be a sequence of length 2"
+        ):
+            transforms.RandomRotation(**kwargs)
+
+    def test_transform_negative_degrees_error(self):
+        with pytest.raises(ValueError, match="If degrees is a single number, it must be positive"):
+            transforms.RandomAffine(degrees=-1)
+
+    def test_transform_unknown_fill_error(self):
+        with pytest.raises(TypeError, match="Got inappropriate fill arg"):
+            transforms.RandomAffine(degrees=0, fill="fill")
diff --git a/test/test_utils.py b/test/test_utils.py
index 32b3db59631..b13bd0f0f5b 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -9,7 +9,7 @@
 import torch
 import torchvision.transforms.functional as F
 import torchvision.utils as utils
-from common_utils import assert_equal
+from common_utils import assert_equal, cpu_and_cuda
 from PIL import __version__ as PILLOW_VERSION, Image, ImageColor
 
 
@@ -203,12 +203,13 @@ def test_draw_no_boxes():
     ],
 )
 @pytest.mark.parametrize("alpha", (0, 0.5, 0.7, 1))
-def test_draw_segmentation_masks(colors, alpha):
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_draw_segmentation_masks(colors, alpha, device):
     """This test makes sure that masks draw their corresponding color where they should"""
     num_masks, h, w = 2, 100, 100
     dtype = torch.uint8
-    img = torch.randint(0, 256, size=(3, h, w), dtype=dtype)
-    masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool)
+    img = torch.randint(0, 256, size=(3, h, w), dtype=dtype, device=device)
+    masks = torch.randint(0, 2, (num_masks, h, w), dtype=torch.bool, device=device)
 
     # For testing we enforce that there's no overlap between the masks. The
     # current behaviour is that the last mask's color will take priority when
@@ -234,7 +235,7 @@ def test_draw_segmentation_masks(colors, alpha):
     for mask, color in zip(masks, colors):
         if isinstance(color, str):
             color = ImageColor.getrgb(color)
-        color = torch.tensor(color, dtype=dtype)
+        color = torch.tensor(color, dtype=dtype, device=device)
 
         if alpha == 1:
             assert (out[:, mask] == color[:, None]).all()
@@ -245,11 +246,12 @@ def test_draw_segmentation_masks(colors, alpha):
         torch.testing.assert_close(out[:, mask], interpolated_color, rtol=0.0, atol=1.0)
 
 
-def test_draw_segmentation_masks_errors():
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_draw_segmentation_masks_errors(device):
     h, w = 10, 10
 
-    masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool)
-    img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8)
+    masks = torch.randint(0, 2, size=(h, w), dtype=torch.bool, device=device)
+    img = torch.randint(0, 256, size=(3, h, w), dtype=torch.uint8, device=device)
 
     with pytest.raises(TypeError, match="The image must be a tensor"):
         utils.draw_segmentation_masks(image="Not A Tensor Image", masks=masks)
@@ -281,9 +283,10 @@ def test_draw_segmentation_masks_errors():
         utils.draw_segmentation_masks(image=img, masks=masks, colors=bad_colors)
 
 
-def test_draw_no_segmention_mask():
-    img = torch.full((3, 100, 100), 0, dtype=torch.uint8)
-    masks = torch.full((0, 100, 100), 0, dtype=torch.bool)
+@pytest.mark.parametrize("device", cpu_and_cuda())
+def test_draw_no_segmention_mask(device):
+    img = torch.full((3, 100, 100), 0, dtype=torch.uint8, device=device)
+    masks = torch.full((0, 100, 100), 0, dtype=torch.bool, device=device)
     with pytest.warns(UserWarning, match=re.escape("masks doesn't contain any mask. No mask was drawn")):
         res = utils.draw_segmentation_masks(img, masks)
         # Check that the function didn't change the image
diff --git a/test/transforms_v2_dispatcher_infos.py b/test/transforms_v2_dispatcher_infos.py
index 1d9dd025254..6f61526f382 100644
--- a/test/transforms_v2_dispatcher_infos.py
+++ b/test/transforms_v2_dispatcher_infos.py
@@ -138,68 +138,6 @@ def fill_sequence_needs_broadcast(args_kwargs):
 
 
 DISPATCHER_INFOS = [
-    DispatcherInfo(
-        F.horizontal_flip,
-        kernels={
-            datapoints.Image: F.horizontal_flip_image_tensor,
-            datapoints.Video: F.horizontal_flip_video,
-            datapoints.BoundingBox: F.horizontal_flip_bounding_box,
-            datapoints.Mask: F.horizontal_flip_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.horizontal_flip_image_pil, kernel_name="horizontal_flip_image_pil"),
-    ),
-    DispatcherInfo(
-        F.resize,
-        kernels={
-            datapoints.Image: F.resize_image_tensor,
-            datapoints.Video: F.resize_video,
-            datapoints.BoundingBox: F.resize_bounding_box,
-            datapoints.Mask: F.resize_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.resize_image_pil),
-        test_marks=[
-            xfail_jit_python_scalar_arg("size"),
-        ],
-    ),
-    DispatcherInfo(
-        F.affine,
-        kernels={
-            datapoints.Image: F.affine_image_tensor,
-            datapoints.Video: F.affine_video,
-            datapoints.BoundingBox: F.affine_bounding_box,
-            datapoints.Mask: F.affine_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.affine_image_pil),
-        test_marks=[
-            *xfails_pil_if_fill_sequence_needs_broadcast,
-            xfail_jit_python_scalar_arg("shear"),
-            xfail_jit_python_scalar_arg("fill"),
-        ],
-    ),
-    DispatcherInfo(
-        F.vertical_flip,
-        kernels={
-            datapoints.Image: F.vertical_flip_image_tensor,
-            datapoints.Video: F.vertical_flip_video,
-            datapoints.BoundingBox: F.vertical_flip_bounding_box,
-            datapoints.Mask: F.vertical_flip_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.vertical_flip_image_pil, kernel_name="vertical_flip_image_pil"),
-    ),
-    DispatcherInfo(
-        F.rotate,
-        kernels={
-            datapoints.Image: F.rotate_image_tensor,
-            datapoints.Video: F.rotate_video,
-            datapoints.BoundingBox: F.rotate_bounding_box,
-            datapoints.Mask: F.rotate_mask,
-        },
-        pil_kernel_info=PILKernelInfo(F.rotate_image_pil),
-        test_marks=[
-            xfail_jit_python_scalar_arg("fill"),
-            *xfails_pil_if_fill_sequence_needs_broadcast,
-        ],
-    ),
     DispatcherInfo(
         F.crop,
         kernels={
diff --git a/test/transforms_v2_kernel_infos.py b/test/transforms_v2_kernel_infos.py
index 7b877fb092d..dc04fbfc7a9 100644
--- a/test/transforms_v2_kernel_infos.py
+++ b/test/transforms_v2_kernel_infos.py
@@ -1,7 +1,6 @@
 import decimal
 import functools
 import itertools
-import math
 
 import numpy as np
 import PIL.Image
@@ -12,6 +11,7 @@
 from common_utils import (
     ArgsKwargs,
     combinations_grid,
+    DEFAULT_PORTRAIT_SPATIAL_SIZE,
     get_num_channels,
     ImageLoader,
     InfoBase,
@@ -156,301 +156,6 @@ def xfail_jit_python_scalar_arg(name, *, reason=None):
 KERNEL_INFOS = []
 
 
-def sample_inputs_horizontal_flip_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], dtypes=[torch.float32]):
-        yield ArgsKwargs(image_loader)
-
-
-def reference_inputs_horizontal_flip_image_tensor():
-    for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]):
-        yield ArgsKwargs(image_loader)
-
-
-def sample_inputs_horizontal_flip_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders(
-        formats=[datapoints.BoundingBoxFormat.XYXY], dtypes=[torch.float32]
-    ):
-        yield ArgsKwargs(
-            bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size
-        )
-
-
-def sample_inputs_horizontal_flip_mask():
-    for image_loader in make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]):
-        yield ArgsKwargs(image_loader)
-
-
-def sample_inputs_horizontal_flip_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader)
-
-
-def reference_horizontal_flip_bounding_box(bounding_box, *, format, spatial_size):
-    affine_matrix = np.array(
-        [
-            [-1, 0, spatial_size[1]],
-            [0, 1, 0],
-        ],
-        dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
-    )
-
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
-    )
-
-    return expected_bboxes
-
-
-def reference_inputs_flip_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders(extra_dims=[()]):
-        yield ArgsKwargs(
-            bounding_box_loader,
-            format=bounding_box_loader.format,
-            spatial_size=bounding_box_loader.spatial_size,
-        )
-
-
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.horizontal_flip_image_tensor,
-            kernel_name="horizontal_flip_image_tensor",
-            sample_inputs_fn=sample_inputs_horizontal_flip_image_tensor,
-            reference_fn=pil_reference_wrapper(F.horizontal_flip_image_pil),
-            reference_inputs_fn=reference_inputs_horizontal_flip_image_tensor,
-            float32_vs_uint8=True,
-        ),
-        KernelInfo(
-            F.horizontal_flip_bounding_box,
-            sample_inputs_fn=sample_inputs_horizontal_flip_bounding_box,
-            reference_fn=reference_horizontal_flip_bounding_box,
-            reference_inputs_fn=reference_inputs_flip_bounding_box,
-        ),
-        KernelInfo(
-            F.horizontal_flip_mask,
-            sample_inputs_fn=sample_inputs_horizontal_flip_mask,
-        ),
-        KernelInfo(
-            F.horizontal_flip_video,
-            sample_inputs_fn=sample_inputs_horizontal_flip_video,
-        ),
-    ]
-)
-
-
-def _get_resize_sizes(spatial_size):
-    height, width = spatial_size
-    length = max(spatial_size)
-    yield length
-    yield [length]
-    yield (length,)
-    new_height = int(height * 0.75)
-    new_width = int(width * 1.25)
-    yield [new_height, new_width]
-    yield height, width
-
-
-def sample_inputs_resize_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]):
-        for size in _get_resize_sizes(image_loader.spatial_size):
-            yield ArgsKwargs(image_loader, size=size)
-
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"]),
-        [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR],
-    ):
-        yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
-
-    yield ArgsKwargs(make_image_loader(size=(11, 17)), size=20, max_size=25)
-
-
-def sample_inputs_resize_image_tensor_bicubic():
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"]), [F.InterpolationMode.BICUBIC]
-    ):
-        yield ArgsKwargs(image_loader, size=[min(image_loader.spatial_size) + 1], interpolation=interpolation)
-
-
-@pil_reference_wrapper
-def reference_resize_image_tensor(*args, **kwargs):
-    if not kwargs.pop("antialias", False) and kwargs.get("interpolation", F.InterpolationMode.BILINEAR) in {
-        F.InterpolationMode.BILINEAR,
-        F.InterpolationMode.BICUBIC,
-    }:
-        raise pytest.UsageError("Anti-aliasing is always active in PIL")
-    return F.resize_image_pil(*args, **kwargs)
-
-
-def reference_inputs_resize_image_tensor():
-    for image_loader, interpolation in itertools.product(
-        make_image_loaders_for_interpolation(),
-        [
-            F.InterpolationMode.NEAREST,
-            F.InterpolationMode.NEAREST_EXACT,
-            F.InterpolationMode.BILINEAR,
-            F.InterpolationMode.BICUBIC,
-        ],
-    ):
-        for size in _get_resize_sizes(image_loader.spatial_size):
-            yield ArgsKwargs(
-                image_loader,
-                size=size,
-                interpolation=interpolation,
-                antialias=interpolation
-                in {
-                    F.InterpolationMode.BILINEAR,
-                    F.InterpolationMode.BICUBIC,
-                },
-            )
-
-
-def sample_inputs_resize_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders():
-        for size in _get_resize_sizes(bounding_box_loader.spatial_size):
-            yield ArgsKwargs(bounding_box_loader, spatial_size=bounding_box_loader.spatial_size, size=size)
-
-
-def sample_inputs_resize_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
-        yield ArgsKwargs(mask_loader, size=[min(mask_loader.shape[-2:]) + 1])
-
-
-def sample_inputs_resize_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader, size=[min(video_loader.shape[-2:]) + 1])
-
-
-def reference_resize_bounding_box(bounding_box, *, spatial_size, size, max_size=None):
-    old_height, old_width = spatial_size
-    new_height, new_width = F._geometry._compute_resized_output_size(spatial_size, size=size, max_size=max_size)
-
-    if (old_height, old_width) == (new_height, new_width):
-        return bounding_box, (old_height, old_width)
-
-    affine_matrix = np.array(
-        [
-            [new_width / old_width, 0, 0],
-            [0, new_height / old_height, 0],
-        ],
-        dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
-    )
-
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box,
-        format=bounding_box.format,
-        spatial_size=(new_height, new_width),
-        affine_matrix=affine_matrix,
-    )
-    return expected_bboxes, (new_height, new_width)
-
-
-def reference_inputs_resize_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders(extra_dims=((), (4,))):
-        for size in _get_resize_sizes(bounding_box_loader.spatial_size):
-            yield ArgsKwargs(bounding_box_loader, size=size, spatial_size=bounding_box_loader.spatial_size)
-
-
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.resize_image_tensor,
-            sample_inputs_fn=sample_inputs_resize_image_tensor,
-            reference_fn=reference_resize_image_tensor,
-            reference_inputs_fn=reference_inputs_resize_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs={
-                **pil_reference_pixel_difference(10, mae=True),
-                **cuda_vs_cpu_pixel_difference(),
-                **float32_vs_uint8_pixel_difference(1, mae=True),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_image_tensor,
-            sample_inputs_fn=sample_inputs_resize_image_tensor_bicubic,
-            reference_fn=reference_resize_image_tensor,
-            reference_inputs_fn=reference_inputs_resize_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs={
-                **pil_reference_pixel_difference(10, mae=True),
-                **cuda_vs_cpu_pixel_difference(atol=30),
-                **float32_vs_uint8_pixel_difference(1, mae=True),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_bounding_box,
-            sample_inputs_fn=sample_inputs_resize_bounding_box,
-            reference_fn=reference_resize_bounding_box,
-            reference_inputs_fn=reference_inputs_resize_bounding_box,
-            closeness_kwargs={
-                (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0),
-            },
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_mask,
-            sample_inputs_fn=sample_inputs_resize_mask,
-            closeness_kwargs=pil_reference_pixel_difference(10),
-            test_marks=[
-                xfail_jit_python_scalar_arg("size"),
-            ],
-        ),
-        KernelInfo(
-            F.resize_video,
-            sample_inputs_fn=sample_inputs_resize_video,
-            closeness_kwargs=cuda_vs_cpu_pixel_difference(),
-        ),
-    ]
-)
-
-
-_AFFINE_KWARGS = combinations_grid(
-    angle=[-87, 15, 90],
-    translate=[(5, 5), (-5, -5)],
-    scale=[0.77, 1.27],
-    shear=[(12, 12), (0, 0)],
-)
-
-
-def _diversify_affine_kwargs_types(affine_kwargs):
-    angle = affine_kwargs["angle"]
-    for diverse_angle in [int(angle), float(angle)]:
-        yield dict(affine_kwargs, angle=diverse_angle)
-
-    shear = affine_kwargs["shear"]
-    for diverse_shear in [tuple(shear), list(shear), int(shear[0]), float(shear[0])]:
-        yield dict(affine_kwargs, shear=diverse_shear)
-
-
-def _full_affine_params(**partial_params):
-    partial_params.setdefault("angle", 0.0)
-    partial_params.setdefault("translate", [0.0, 0.0])
-    partial_params.setdefault("scale", 1.0)
-    partial_params.setdefault("shear", [0.0, 0.0])
-    partial_params.setdefault("center", None)
-    return partial_params
-
-
-_DIVERSE_AFFINE_PARAMS = [
-    _full_affine_params(**{name: arg})
-    for name, args in [
-        ("angle", [1.0, 2]),
-        ("translate", [[1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]),
-        ("scale", [0.5]),
-        ("shear", [1.0, 2, [1.0], [2], (1.0,), (2,), [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]),
-        ("center", [None, [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]),
-    ]
-    for arg in args
-]
-
-
 def get_fills(*, num_channels, dtype):
     yield None
 
@@ -481,72 +186,6 @@ def float32_vs_uint8_fill_adapter(other_args, kwargs):
     return other_args, dict(kwargs, fill=fill)
 
 
-def sample_inputs_affine_image_tensor():
-    make_affine_image_loaders = functools.partial(
-        make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]
-    )
-
-    for image_loader, affine_params in itertools.product(make_affine_image_loaders(), _DIVERSE_AFFINE_PARAMS):
-        yield ArgsKwargs(image_loader, **affine_params)
-
-    for image_loader in make_affine_image_loaders():
-        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
-            yield ArgsKwargs(image_loader, **_full_affine_params(), fill=fill)
-
-    for image_loader, interpolation in itertools.product(
-        make_affine_image_loaders(),
-        [
-            F.InterpolationMode.NEAREST,
-            F.InterpolationMode.BILINEAR,
-        ],
-    ):
-        yield ArgsKwargs(image_loader, **_full_affine_params(), fill=0)
-
-
-def reference_inputs_affine_image_tensor():
-    for image_loader, affine_kwargs in itertools.product(make_image_loaders_for_interpolation(), _AFFINE_KWARGS):
-        yield ArgsKwargs(
-            image_loader,
-            interpolation=F.InterpolationMode.NEAREST,
-            **affine_kwargs,
-        )
-
-
-def sample_inputs_affine_bounding_box():
-    for bounding_box_loader, affine_params in itertools.product(
-        make_bounding_box_loaders(formats=[datapoints.BoundingBoxFormat.XYXY]), _DIVERSE_AFFINE_PARAMS
-    ):
-        yield ArgsKwargs(
-            bounding_box_loader,
-            format=bounding_box_loader.format,
-            spatial_size=bounding_box_loader.spatial_size,
-            **affine_params,
-        )
-
-
-def _compute_affine_matrix(angle, translate, scale, shear, center):
-    rot = math.radians(angle)
-    cx, cy = center
-    tx, ty = translate
-    sx, sy = [math.radians(sh_) for sh_ in shear]
-
-    c_matrix = np.array([[1, 0, cx], [0, 1, cy], [0, 0, 1]])
-    t_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
-    c_matrix_inv = np.linalg.inv(c_matrix)
-    rs_matrix = np.array(
-        [
-            [scale * math.cos(rot), -scale * math.sin(rot), 0],
-            [scale * math.sin(rot), scale * math.cos(rot), 0],
-            [0, 0, 1],
-        ]
-    )
-    shear_x_matrix = np.array([[1, -math.tan(sx), 0], [0, 1, 0], [0, 0, 1]])
-    shear_y_matrix = np.array([[1, 0, 0], [-math.tan(sy), 1, 0], [0, 0, 1]])
-    rss_matrix = np.matmul(rs_matrix, np.matmul(shear_y_matrix, shear_x_matrix))
-    true_matrix = np.matmul(t_matrix, np.matmul(c_matrix, np.matmul(rss_matrix, c_matrix_inv)))
-    return true_matrix
-
-
 def reference_affine_bounding_box_helper(bounding_box, *, format, spatial_size, affine_matrix):
     def transform(bbox, affine_matrix_, format_, spatial_size_):
         # Go to float before converting to prevent precision loss in case of CXCYWH -> XYXY and W or H is 1
@@ -597,81 +236,6 @@ def transform(bbox, affine_matrix_, format_, spatial_size_):
     return expected_bboxes
 
 
-def reference_affine_bounding_box(bounding_box, *, format, spatial_size, angle, translate, scale, shear, center=None):
-    if center is None:
-        center = [s * 0.5 for s in spatial_size[::-1]]
-
-    affine_matrix = _compute_affine_matrix(angle, translate, scale, shear, center)
-    affine_matrix = affine_matrix[:2, :]
-
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
-    )
-
-    return expected_bboxes
-
-
-def reference_inputs_affine_bounding_box():
-    for bounding_box_loader, affine_kwargs in itertools.product(
-        make_bounding_box_loaders(extra_dims=[()]),
-        _AFFINE_KWARGS,
-    ):
-        yield ArgsKwargs(
-            bounding_box_loader,
-            format=bounding_box_loader.format,
-            spatial_size=bounding_box_loader.spatial_size,
-            **affine_kwargs,
-        )
-
-
-def sample_inputs_affine_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
-        yield ArgsKwargs(mask_loader, **_full_affine_params())
-
-
-def sample_inputs_affine_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader, **_full_affine_params())
-
-
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.affine_image_tensor,
-            sample_inputs_fn=sample_inputs_affine_image_tensor,
-            reference_fn=pil_reference_wrapper(F.affine_image_pil),
-            reference_inputs_fn=reference_inputs_affine_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs=pil_reference_pixel_difference(10, mae=True),
-            test_marks=[
-                xfail_jit_python_scalar_arg("shear"),
-                xfail_jit_python_scalar_arg("fill"),
-            ],
-        ),
-        KernelInfo(
-            F.affine_bounding_box,
-            sample_inputs_fn=sample_inputs_affine_bounding_box,
-            reference_fn=reference_affine_bounding_box,
-            reference_inputs_fn=reference_inputs_affine_bounding_box,
-            test_marks=[
-                xfail_jit_python_scalar_arg("shear"),
-            ],
-        ),
-        KernelInfo(
-            F.affine_mask,
-            sample_inputs_fn=sample_inputs_affine_mask,
-            test_marks=[
-                xfail_jit_python_scalar_arg("shear"),
-            ],
-        ),
-        KernelInfo(
-            F.affine_video,
-            sample_inputs_fn=sample_inputs_affine_video,
-        ),
-    ]
-)
-
-
 def sample_inputs_convert_format_bounding_box():
     formats = list(datapoints.BoundingBoxFormat)
     for bounding_box_loader, new_format in itertools.product(make_bounding_box_loaders(formats=formats), formats):
@@ -697,205 +261,13 @@ def reference_inputs_convert_format_bounding_box():
         reference_fn=reference_convert_format_bounding_box,
         reference_inputs_fn=reference_inputs_convert_format_bounding_box,
         logs_usage=True,
+        closeness_kwargs={
+            (("TestKernels", "test_against_reference"), torch.int64, "cpu"): dict(atol=1, rtol=0),
+        },
     ),
 )
 
 
-def sample_inputs_vertical_flip_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], dtypes=[torch.float32]):
-        yield ArgsKwargs(image_loader)
-
-
-def reference_inputs_vertical_flip_image_tensor():
-    for image_loader in make_image_loaders(extra_dims=[()], dtypes=[torch.uint8]):
-        yield ArgsKwargs(image_loader)
-
-
-def sample_inputs_vertical_flip_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders(
-        formats=[datapoints.BoundingBoxFormat.XYXY], dtypes=[torch.float32]
-    ):
-        yield ArgsKwargs(
-            bounding_box_loader, format=bounding_box_loader.format, spatial_size=bounding_box_loader.spatial_size
-        )
-
-
-def sample_inputs_vertical_flip_mask():
-    for image_loader in make_mask_loaders(sizes=["random"], dtypes=[torch.uint8]):
-        yield ArgsKwargs(image_loader)
-
-
-def sample_inputs_vertical_flip_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader)
-
-
-def reference_vertical_flip_bounding_box(bounding_box, *, format, spatial_size):
-    affine_matrix = np.array(
-        [
-            [1, 0, 0],
-            [0, -1, spatial_size[0]],
-        ],
-        dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
-    )
-
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
-    )
-
-    return expected_bboxes
-
-
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.vertical_flip_image_tensor,
-            kernel_name="vertical_flip_image_tensor",
-            sample_inputs_fn=sample_inputs_vertical_flip_image_tensor,
-            reference_fn=pil_reference_wrapper(F.vertical_flip_image_pil),
-            reference_inputs_fn=reference_inputs_vertical_flip_image_tensor,
-            float32_vs_uint8=True,
-        ),
-        KernelInfo(
-            F.vertical_flip_bounding_box,
-            sample_inputs_fn=sample_inputs_vertical_flip_bounding_box,
-            reference_fn=reference_vertical_flip_bounding_box,
-            reference_inputs_fn=reference_inputs_flip_bounding_box,
-        ),
-        KernelInfo(
-            F.vertical_flip_mask,
-            sample_inputs_fn=sample_inputs_vertical_flip_mask,
-        ),
-        KernelInfo(
-            F.vertical_flip_video,
-            sample_inputs_fn=sample_inputs_vertical_flip_video,
-        ),
-    ]
-)
-
-_ROTATE_ANGLES = [-87, 15, 90]
-
-
-def sample_inputs_rotate_image_tensor():
-    make_rotate_image_loaders = functools.partial(
-        make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]
-    )
-
-    for image_loader in make_rotate_image_loaders():
-        yield ArgsKwargs(image_loader, angle=15.0, expand=True)
-
-    for image_loader, center in itertools.product(
-        make_rotate_image_loaders(), [None, [1.0, 0.5], [1, 2], (1.0, 0.5), (1, 2)]
-    ):
-        yield ArgsKwargs(image_loader, angle=15.0, center=center)
-
-    for image_loader in make_rotate_image_loaders():
-        for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
-            yield ArgsKwargs(image_loader, angle=15.0, fill=fill)
-
-    for image_loader, interpolation in itertools.product(
-        make_rotate_image_loaders(),
-        [F.InterpolationMode.NEAREST, F.InterpolationMode.BILINEAR],
-    ):
-        yield ArgsKwargs(image_loader, angle=15.0, fill=0)
-
-
-def reference_inputs_rotate_image_tensor():
-    for image_loader, angle in itertools.product(make_image_loaders_for_interpolation(), _ROTATE_ANGLES):
-        yield ArgsKwargs(image_loader, angle=angle)
-
-
-def sample_inputs_rotate_bounding_box():
-    for bounding_box_loader in make_bounding_box_loaders():
-        yield ArgsKwargs(
-            bounding_box_loader,
-            format=bounding_box_loader.format,
-            spatial_size=bounding_box_loader.spatial_size,
-            angle=_ROTATE_ANGLES[0],
-        )
-
-
-def reference_inputs_rotate_bounding_box():
-    for bounding_box_loader, angle in itertools.product(
-        make_bounding_box_loaders(extra_dims=((), (4,))), _ROTATE_ANGLES
-    ):
-        yield ArgsKwargs(
-            bounding_box_loader,
-            format=bounding_box_loader.format,
-            spatial_size=bounding_box_loader.spatial_size,
-            angle=angle,
-        )
-
-    # TODO: add samples with expand=True and center
-
-
-def reference_rotate_bounding_box(bounding_box, *, format, spatial_size, angle, expand=False, center=None):
-
-    if center is None:
-        center = [spatial_size[1] * 0.5, spatial_size[0] * 0.5]
-
-    a = np.cos(angle * np.pi / 180.0)
-    b = np.sin(angle * np.pi / 180.0)
-    cx = center[0]
-    cy = center[1]
-    affine_matrix = np.array(
-        [
-            [a, b, cx - cx * a - b * cy],
-            [-b, a, cy + cx * b - a * cy],
-        ],
-        dtype="float64" if bounding_box.dtype == torch.float64 else "float32",
-    )
-
-    expected_bboxes = reference_affine_bounding_box_helper(
-        bounding_box, format=format, spatial_size=spatial_size, affine_matrix=affine_matrix
-    )
-    return expected_bboxes, spatial_size
-
-
-def sample_inputs_rotate_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
-        yield ArgsKwargs(mask_loader, angle=15.0)
-
-
-def sample_inputs_rotate_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
-        yield ArgsKwargs(video_loader, angle=15.0)
-
-
-KERNEL_INFOS.extend(
-    [
-        KernelInfo(
-            F.rotate_image_tensor,
-            sample_inputs_fn=sample_inputs_rotate_image_tensor,
-            reference_fn=pil_reference_wrapper(F.rotate_image_pil),
-            reference_inputs_fn=reference_inputs_rotate_image_tensor,
-            float32_vs_uint8=True,
-            closeness_kwargs=pil_reference_pixel_difference(1, mae=True),
-            test_marks=[
-                xfail_jit_python_scalar_arg("fill"),
-            ],
-        ),
-        KernelInfo(
-            F.rotate_bounding_box,
-            sample_inputs_fn=sample_inputs_rotate_bounding_box,
-            reference_fn=reference_rotate_bounding_box,
-            reference_inputs_fn=reference_inputs_rotate_bounding_box,
-            closeness_kwargs={
-                **scripted_vs_eager_float64_tolerances("cpu", atol=1e-4, rtol=1e-4),
-                **scripted_vs_eager_float64_tolerances("cuda", atol=1e-4, rtol=1e-4),
-            },
-        ),
-        KernelInfo(
-            F.rotate_mask,
-            sample_inputs_fn=sample_inputs_rotate_mask,
-        ),
-        KernelInfo(
-            F.rotate_video,
-            sample_inputs_fn=sample_inputs_rotate_video,
-        ),
-    ]
-)
-
 _CROP_PARAMS = combinations_grid(top=[-8, 0, 9], left=[-8, 0, 9], height=[12, 20], width=[12, 20])
 
 
@@ -928,7 +300,7 @@ def sample_inputs_crop_bounding_box():
 
 
 def sample_inputs_crop_mask():
-    for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=["random"], num_objects=["random"]):
+    for mask_loader in make_mask_loaders(sizes=[(16, 17)], num_categories=[10], num_objects=[5]):
         yield ArgsKwargs(mask_loader, top=4, left=3, height=7, width=8)
 
 
@@ -938,7 +310,7 @@ def reference_inputs_crop_mask():
 
 
 def sample_inputs_crop_video():
-    for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[(16, 17)], num_frames=[3]):
         yield ArgsKwargs(video_loader, top=4, left=3, height=7, width=8)
 
 
@@ -1047,7 +419,7 @@ def sample_inputs_resized_crop_mask():
 
 
 def sample_inputs_resized_crop_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, **_RESIZED_CROP_PARAMS[0])
 
 
@@ -1089,7 +461,7 @@ def sample_inputs_resized_crop_video():
 
 def sample_inputs_pad_image_tensor():
     make_pad_image_loaders = functools.partial(
-        make_image_loaders, sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]
+        make_image_loaders, sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]
     )
 
     for image_loader, padding in itertools.product(
@@ -1144,7 +516,7 @@ def sample_inputs_pad_bounding_box():
 
 
 def sample_inputs_pad_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
+    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]):
         yield ArgsKwargs(mask_loader, padding=[1])
 
 
@@ -1156,7 +528,7 @@ def reference_inputs_pad_mask():
 
 
 def sample_inputs_pad_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, padding=[1])
 
 
@@ -1252,7 +624,7 @@ def pad_xfail_jit_fill_condition(args_kwargs):
 
 
 def sample_inputs_perspective_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"]):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
         for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(
                 image_loader, startpoints=None, endpoints=None, fill=fill, coefficients=_PERSPECTIVE_COEFFS[0]
@@ -1304,7 +676,7 @@ def sample_inputs_perspective_bounding_box():
 
 
 def sample_inputs_perspective_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"]):
+    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
         yield ArgsKwargs(mask_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0])
 
     yield ArgsKwargs(make_detection_mask_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS)
@@ -1318,7 +690,7 @@ def reference_inputs_perspective_mask():
 
 
 def sample_inputs_perspective_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, startpoints=None, endpoints=None, coefficients=_PERSPECTIVE_COEFFS[0])
 
     yield ArgsKwargs(make_video_loader(), startpoints=_STARTPOINTS, endpoints=_ENDPOINTS)
@@ -1377,7 +749,7 @@ def _get_elastic_displacement(spatial_size):
 
 
 def sample_inputs_elastic_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"]):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
         displacement = _get_elastic_displacement(image_loader.spatial_size)
         for fill in get_fills(num_channels=image_loader.num_channels, dtype=image_loader.dtype):
             yield ArgsKwargs(image_loader, displacement=displacement, fill=fill)
@@ -1409,13 +781,13 @@ def sample_inputs_elastic_bounding_box():
 
 
 def sample_inputs_elastic_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"]):
+    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
         displacement = _get_elastic_displacement(mask_loader.shape[-2:])
         yield ArgsKwargs(mask_loader, displacement=displacement)
 
 
 def sample_inputs_elastic_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         displacement = _get_elastic_displacement(video_loader.shape[-2:])
         yield ArgsKwargs(video_loader, displacement=displacement)
 
@@ -1486,7 +858,7 @@ def sample_inputs_center_crop_bounding_box():
 
 
 def sample_inputs_center_crop_mask():
-    for mask_loader in make_mask_loaders(sizes=["random"], num_categories=["random"], num_objects=["random"]):
+    for mask_loader in make_mask_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_categories=[10], num_objects=[5]):
         height, width = mask_loader.shape[-2:]
         yield ArgsKwargs(mask_loader, output_size=(height // 2, width // 2))
 
@@ -1499,7 +871,7 @@ def reference_inputs_center_crop_mask():
 
 
 def sample_inputs_center_crop_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         height, width = video_loader.shape[-2:]
         yield ArgsKwargs(video_loader, output_size=(height // 2, width // 2))
 
@@ -1579,7 +951,7 @@ def sample_inputs_gaussian_blur_video():
 
 
 def sample_inputs_equalize_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader)
 
 
@@ -1640,7 +1012,7 @@ def make_beta_distributed_image(shape, dtype, device, *, alpha, beta, memory_for
 
 
 def sample_inputs_equalize_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader)
 
 
@@ -1663,7 +1035,7 @@ def sample_inputs_equalize_video():
 
 
 def sample_inputs_invert_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader)
 
 
@@ -1673,7 +1045,7 @@ def reference_inputs_invert_image_tensor():
 
 
 def sample_inputs_invert_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader)
 
 
@@ -1699,7 +1071,7 @@ def sample_inputs_invert_video():
 
 
 def sample_inputs_posterize_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, bits=_POSTERIZE_BITS[0])
 
 
@@ -1712,7 +1084,7 @@ def reference_inputs_posterize_image_tensor():
 
 
 def sample_inputs_posterize_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, bits=_POSTERIZE_BITS[0])
 
 
@@ -1742,7 +1114,7 @@ def _get_solarize_thresholds(dtype):
 
 
 def sample_inputs_solarize_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, threshold=next(_get_solarize_thresholds(image_loader.dtype)))
 
 
@@ -1757,7 +1129,7 @@ def uint8_to_float32_threshold_adapter(other_args, kwargs):
 
 
 def sample_inputs_solarize_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, threshold=next(_get_solarize_thresholds(video_loader.dtype)))
 
 
@@ -1781,7 +1153,7 @@ def sample_inputs_solarize_video():
 
 
 def sample_inputs_autocontrast_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader)
 
 
@@ -1791,7 +1163,7 @@ def reference_inputs_autocontrast_image_tensor():
 
 
 def sample_inputs_autocontrast_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader)
 
 
@@ -1821,7 +1193,7 @@ def sample_inputs_autocontrast_video():
 
 def sample_inputs_adjust_sharpness_image_tensor():
     for image_loader in make_image_loaders(
-        sizes=["random", (2, 2)],
+        sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE, (2, 2)],
         color_spaces=("GRAY", "RGB"),
     ):
         yield ArgsKwargs(image_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0])
@@ -1836,7 +1208,7 @@ def reference_inputs_adjust_sharpness_image_tensor():
 
 
 def sample_inputs_adjust_sharpness_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, sharpness_factor=_ADJUST_SHARPNESS_FACTORS[0])
 
 
@@ -1860,7 +1232,7 @@ def sample_inputs_adjust_sharpness_video():
 
 
 def sample_inputs_erase_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"]):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE]):
         # FIXME: make the parameters more diverse
         h, w = 6, 7
         v = torch.rand(image_loader.num_channels, h, w)
@@ -1868,7 +1240,7 @@ def sample_inputs_erase_image_tensor():
 
 
 def sample_inputs_erase_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         # FIXME: make the parameters more diverse
         h, w = 6, 7
         v = torch.rand(video_loader.num_channels, h, w)
@@ -1893,7 +1265,7 @@ def sample_inputs_erase_video():
 
 
 def sample_inputs_adjust_brightness_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0])
 
 
@@ -1906,7 +1278,7 @@ def reference_inputs_adjust_brightness_image_tensor():
 
 
 def sample_inputs_adjust_brightness_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, brightness_factor=_ADJUST_BRIGHTNESS_FACTORS[0])
 
 
@@ -1933,7 +1305,7 @@ def sample_inputs_adjust_brightness_video():
 
 
 def sample_inputs_adjust_contrast_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0])
 
 
@@ -1946,7 +1318,7 @@ def reference_inputs_adjust_contrast_image_tensor():
 
 
 def sample_inputs_adjust_contrast_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, contrast_factor=_ADJUST_CONTRAST_FACTORS[0])
 
 
@@ -1985,7 +1357,7 @@ def sample_inputs_adjust_contrast_video():
 
 def sample_inputs_adjust_gamma_image_tensor():
     gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0]
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, gamma=gamma, gain=gain)
 
 
@@ -1999,7 +1371,7 @@ def reference_inputs_adjust_gamma_image_tensor():
 
 def sample_inputs_adjust_gamma_video():
     gamma, gain = _ADJUST_GAMMA_GAMMAS_GAINS[0]
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, gamma=gamma, gain=gain)
 
 
@@ -2029,7 +1401,7 @@ def sample_inputs_adjust_gamma_video():
 
 
 def sample_inputs_adjust_hue_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, hue_factor=_ADJUST_HUE_FACTORS[0])
 
 
@@ -2042,7 +1414,7 @@ def reference_inputs_adjust_hue_image_tensor():
 
 
 def sample_inputs_adjust_hue_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, hue_factor=_ADJUST_HUE_FACTORS[0])
 
 
@@ -2071,7 +1443,7 @@ def sample_inputs_adjust_hue_video():
 
 
 def sample_inputs_adjust_saturation_image_tensor():
-    for image_loader in make_image_loaders(sizes=["random"], color_spaces=("GRAY", "RGB")):
+    for image_loader in make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=("GRAY", "RGB")):
         yield ArgsKwargs(image_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0])
 
 
@@ -2084,7 +1456,7 @@ def reference_inputs_adjust_saturation_image_tensor():
 
 
 def sample_inputs_adjust_saturation_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader, saturation_factor=_ADJUST_SATURATION_FACTORS[0])
 
 
@@ -2244,7 +1616,7 @@ def wrapper(input_tensor, *other_args, **kwargs):
 
 def sample_inputs_normalize_image_tensor():
     for image_loader, (mean, std) in itertools.product(
-        make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[torch.float32]),
+        make_image_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[torch.float32]),
         _NORMALIZE_MEANS_STDS,
     ):
         yield ArgsKwargs(image_loader, mean=mean, std=std)
@@ -2269,7 +1641,7 @@ def reference_inputs_normalize_image_tensor():
 def sample_inputs_normalize_video():
     mean, std = _NORMALIZE_MEANS_STDS[0]
     for video_loader in make_video_loaders(
-        sizes=["random"], color_spaces=["RGB"], num_frames=["random"], dtypes=[torch.float32]
+        sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[3], dtypes=[torch.float32]
     ):
         yield ArgsKwargs(video_loader, mean=mean, std=std)
 
@@ -2303,7 +1675,9 @@ def sample_inputs_convert_dtype_image_tensor():
             # conversion cannot be performed safely
             continue
 
-        for image_loader in make_image_loaders(sizes=["random"], color_spaces=["RGB"], dtypes=[input_dtype]):
+        for image_loader in make_image_loaders(
+            sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], dtypes=[input_dtype]
+        ):
             yield ArgsKwargs(image_loader, dtype=output_dtype)
 
 
@@ -2368,7 +1742,7 @@ def reference_inputs_convert_dtype_image_tensor():
 
 
 def sample_inputs_convert_dtype_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=["random"]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[3]):
         yield ArgsKwargs(video_loader)
 
 
@@ -2413,7 +1787,7 @@ def sample_inputs_convert_dtype_video():
 
 
 def sample_inputs_uniform_temporal_subsample_video():
-    for video_loader in make_video_loaders(sizes=["random"], num_frames=[4]):
+    for video_loader in make_video_loaders(sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], num_frames=[4]):
         yield ArgsKwargs(video_loader, num_samples=2)
 
 
@@ -2429,7 +1803,9 @@ def reference_uniform_temporal_subsample_video(x, num_samples):
 
 
 def reference_inputs_uniform_temporal_subsample_video():
-    for video_loader in make_video_loaders(sizes=["random"], color_spaces=["RGB"], num_frames=[10]):
+    for video_loader in make_video_loaders(
+        sizes=[DEFAULT_PORTRAIT_SPATIAL_SIZE], color_spaces=["RGB"], num_frames=[10]
+    ):
         for num_samples in range(1, video_loader.shape[-4] + 1):
             yield ArgsKwargs(video_loader, num_samples)
 
diff --git a/torchvision/csrc/io/image/cpu/decode_png.cpp b/torchvision/csrc/io/image/cpu/decode_png.cpp
index b1ceaf1badd..d27eafe45a7 100644
--- a/torchvision/csrc/io/image/cpu/decode_png.cpp
+++ b/torchvision/csrc/io/image/cpu/decode_png.cpp
@@ -49,6 +49,7 @@ torch::Tensor decode_png(
     png_destroy_read_struct(&png_ptr, &info_ptr, nullptr);
     TORCH_CHECK(false, "Internal error.");
   }
+  TORCH_CHECK(datap_len >= 8, "Content is too small for png!")
   auto is_png = !png_sig_cmp(datap, 0, 8);
   TORCH_CHECK(is_png, "Content is not png!")
 
diff --git a/torchvision/datapoints/_datapoint.py b/torchvision/datapoints/_datapoint.py
index fe489d13ea0..0dabec58f25 100644
--- a/torchvision/datapoints/_datapoint.py
+++ b/torchvision/datapoints/_datapoint.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 from types import ModuleType
-from typing import Any, Callable, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, Union
+from typing import Any, Callable, Dict, List, Mapping, Optional, Sequence, Tuple, Type, TypeVar, Union
 
 import PIL.Image
 import torch
@@ -36,6 +36,7 @@ def wrap_like(cls: Type[D], other: D, tensor: torch.Tensor) -> D:
     _NO_WRAPPING_EXCEPTIONS = {
         torch.Tensor.clone: lambda cls, input, output: cls.wrap_like(input, output),
         torch.Tensor.to: lambda cls, input, output: cls.wrap_like(input, output),
+        torch.Tensor.detach: lambda cls, input, output: cls.wrap_like(input, output),
         # We don't need to wrap the output of `Tensor.requires_grad_`, since it is an inplace operation and thus
         # retains the type automatically
         torch.Tensor.requires_grad_: lambda cls, input, output: output,
@@ -132,6 +133,15 @@ def dtype(self) -> _dtype:  # type: ignore[override]
         with DisableTorchFunctionSubclass():
             return super().dtype
 
+    def __deepcopy__(self: D, memo: Dict[int, Any]) -> D:
+        # We need to detach first, since a plain `Tensor.clone` will be part of the computation graph, which does
+        # *not* happen for `deepcopy(Tensor)`. A side-effect from detaching is that the `Tensor.requires_grad`
+        # attribute is cleared, so we need to refill it before we return.
+        # Note: We don't explicitly handle deep-copying of the metadata here. The only metadata we currently have is
+        # `BoundingBox.format` and `BoundingBox.spatial_size`, which are immutable and thus implicitly deep-copied by
+        # `BoundingBox.clone()`.
+        return self.detach().clone().requires_grad_(self.requires_grad)  # type: ignore[return-value]
+
     def horizontal_flip(self) -> Datapoint:
         return self
 
diff --git a/torchvision/datasets/utils.py b/torchvision/datasets/utils.py
index 220c1ae79d5..b79b4ef4e61 100644
--- a/torchvision/datasets/utils.py
+++ b/torchvision/datasets/utils.py
@@ -57,7 +57,7 @@ def calculate_md5(fpath: str, chunk_size: int = 1024 * 1024) -> str:
     else:
         md5 = hashlib.md5()
     with open(fpath, "rb") as f:
-        for chunk in iter(lambda: f.read(chunk_size), b""):
+        while chunk := f.read(chunk_size):
             md5.update(chunk)
     return md5.hexdigest()
 
diff --git a/torchvision/models/_api.py b/torchvision/models/_api.py
index 51db5c0b23e..0999bf7ba6b 100644
--- a/torchvision/models/_api.py
+++ b/torchvision/models/_api.py
@@ -1,3 +1,4 @@
+import fnmatch
 import importlib
 import inspect
 import sys
@@ -6,7 +7,7 @@
 from functools import partial
 from inspect import signature
 from types import ModuleType
-from typing import Any, Callable, Dict, List, Mapping, Optional, Type, TypeVar, Union
+from typing import Any, Callable, Dict, Iterable, List, Mapping, Optional, Set, Type, TypeVar, Union
 
 from torch import nn
 
@@ -122,7 +123,9 @@ def get_weight(name: str) -> WeightsEnum:
     base_module_name = ".".join(sys.modules[__name__].__name__.split(".")[:-1])
     base_module = importlib.import_module(base_module_name)
     model_modules = [base_module] + [
-        x[1] for x in inspect.getmembers(base_module, inspect.ismodule) if x[1].__file__.endswith("__init__.py")
+        x[1]
+        for x in inspect.getmembers(base_module, inspect.ismodule)
+        if x[1].__file__.endswith("__init__.py")  # type: ignore[union-attr]
     ]
 
     weights_enum = None
@@ -201,19 +204,43 @@ def wrapper(fn: Callable[..., M]) -> Callable[..., M]:
     return wrapper
 
 
-def list_models(module: Optional[ModuleType] = None) -> List[str]:
+def list_models(
+    module: Optional[ModuleType] = None,
+    include: Union[Iterable[str], str, None] = None,
+    exclude: Union[Iterable[str], str, None] = None,
+) -> List[str]:
     """
     Returns a list with the names of registered models.
 
     Args:
         module (ModuleType, optional): The module from which we want to extract the available models.
+        include (str or Iterable[str], optional): Filter(s) for including the models from the set of all models.
+            Filters are passed to `fnmatch <https://docs.python.org/3/library/fnmatch.html>`__ to match Unix shell-style
+            wildcards. In case of many filters, the results is the union of individual filters.
+        exclude (str or Iterable[str], optional): Filter(s) applied after include_filters to remove models.
+            Filter are passed to `fnmatch <https://docs.python.org/3/library/fnmatch.html>`__ to match Unix shell-style
+            wildcards. In case of many filters, the results is removal of all the models that match any individual filter.
 
     Returns:
         models (list): A list with the names of available models.
     """
-    models = [
+    all_models = {
         k for k, v in BUILTIN_MODELS.items() if module is None or v.__module__.rsplit(".", 1)[0] == module.__name__
-    ]
+    }
+    if include:
+        models: Set[str] = set()
+        if isinstance(include, str):
+            include = [include]
+        for include_filter in include:
+            models = models | set(fnmatch.filter(all_models, include_filter))
+    else:
+        models = all_models
+
+    if exclude:
+        if isinstance(exclude, str):
+            exclude = [exclude]
+        for exclude_filter in exclude:
+            models = models - set(fnmatch.filter(all_models, exclude_filter))
     return sorted(models)
 
 
diff --git a/torchvision/models/mobilenetv3.py b/torchvision/models/mobilenetv3.py
index aa520e14962..1041d4d149f 100644
--- a/torchvision/models/mobilenetv3.py
+++ b/torchvision/models/mobilenetv3.py
@@ -378,7 +378,7 @@ def mobilenet_v3_large(
             weights are used.
         progress (bool, optional): If True, displays a progress bar of the
             download to stderr. Default is True.
-        **kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3``
+        **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3``
             base class. Please refer to the `source code
             <https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
             for more details about this class.
@@ -409,7 +409,7 @@ def mobilenet_v3_small(
             weights are used.
         progress (bool, optional): If True, displays a progress bar of the
             download to stderr. Default is True.
-        **kwargs: parameters passed to the ``torchvision.models.resnet.MobileNetV3``
+        **kwargs: parameters passed to the ``torchvision.models.mobilenet.MobileNetV3``
             base class. Please refer to the `source code
             <https://github.com/pytorch/vision/blob/main/torchvision/models/mobilenetv3.py>`_
             for more details about this class.
diff --git a/torchvision/models/shufflenetv2.py b/torchvision/models/shufflenetv2.py
index 52b85244b3d..3f3322b7a88 100644
--- a/torchvision/models/shufflenetv2.py
+++ b/torchvision/models/shufflenetv2.py
@@ -35,7 +35,7 @@ def channel_shuffle(x: Tensor, groups: int) -> Tensor:
     x = torch.transpose(x, 1, 2).contiguous()
 
     # flatten
-    x = x.view(batchsize, -1, height, width)
+    x = x.view(batchsize, num_channels, height, width)
 
     return x
 
diff --git a/torchvision/prototype/datasets/_builtin/README.md b/torchvision/prototype/datasets/_builtin/README.md
index 05d61c6870e..3b33100eb81 100644
--- a/torchvision/prototype/datasets/_builtin/README.md
+++ b/torchvision/prototype/datasets/_builtin/README.md
@@ -91,7 +91,7 @@ import hashlib
 def sha256sum(path, chunk_size=1024 * 1024):
     checksum = hashlib.sha256()
     with open(path, "rb") as f:
-        for chunk in iter(lambda: f.read(chunk_size), b""):
+        while chunk := f.read(chunk_size):
             checksum.update(chunk)
     print(checksum.hexdigest())
 ```
diff --git a/torchvision/prototype/datasets/utils/_resource.py b/torchvision/prototype/datasets/utils/_resource.py
index af4ede38dc0..dadec014b52 100644
--- a/torchvision/prototype/datasets/utils/_resource.py
+++ b/torchvision/prototype/datasets/utils/_resource.py
@@ -136,7 +136,7 @@ def download(self, root: Union[str, pathlib.Path], *, skip_integrity_check: bool
     def _check_sha256(self, path: pathlib.Path, *, chunk_size: int = 1024 * 1024) -> None:
         hash = hashlib.sha256()
         with open(path, "rb") as file:
-            for chunk in iter(lambda: file.read(chunk_size), b""):
+            while chunk := file.read(chunk_size):
                 hash.update(chunk)
         sha256 = hash.hexdigest()
         if sha256 != self.sha256:
diff --git a/torchvision/transforms/functional.py b/torchvision/transforms/functional.py
index 2c2f1e19359..3e81005c6d6 100644
--- a/torchvision/transforms/functional.py
+++ b/torchvision/transforms/functional.py
@@ -1248,7 +1248,7 @@ def affine(
 
 # Looks like to_grayscale() is a stand-alone functional that is never called
 # from the transform classes. Perhaps it's still here for BC? I can't be
-# bothered to dig. Anyway, this can be deprecated as we migrate to V2.
+# bothered to dig.
 @torch.jit.unused
 def to_grayscale(img, num_output_channels=1):
     """Convert PIL image of any mode (RGB, HSV, LAB, etc) to grayscale version of image.
diff --git a/torchvision/transforms/transforms.py b/torchvision/transforms/transforms.py
index d0290f93249..38fc417204c 100644
--- a/torchvision/transforms/transforms.py
+++ b/torchvision/transforms/transforms.py
@@ -199,21 +199,21 @@ def forward(self, image):
 
 
 class ToPILImage:
-    """Convert a tensor or an ndarray to PIL Image - this does not scale values.
+    """Convert a tensor or an ndarray to PIL Image
 
     This transform does not support torchscript.
 
     Converts a torch.*Tensor of shape C x H x W or a numpy ndarray of shape
-    H x W x C to a PIL Image while preserving the value range.
+    H x W x C to a PIL Image while adjusting the value range depending on the ``mode``.
 
     Args:
         mode (`PIL.Image mode`_): color space and pixel depth of input data (optional).
             If ``mode`` is ``None`` (default) there are some assumptions made about the input data:
+
             - If the input has 4 channels, the ``mode`` is assumed to be ``RGBA``.
             - If the input has 3 channels, the ``mode`` is assumed to be ``RGB``.
             - If the input has 2 channels, the ``mode`` is assumed to be ``LA``.
-            - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``,
-            ``short``).
+            - If the input has 1 channel, the ``mode`` is determined by the data type (i.e ``int``, ``float``, ``short``).
 
     .. _PIL.Image mode: https://pillow.readthedocs.io/en/latest/handbook/concepts.html#concept-modes
     """
diff --git a/torchvision/transforms/v2/functional/__init__.py b/torchvision/transforms/v2/functional/__init__.py
index ffb34c87748..b4803f4f1b9 100644
--- a/torchvision/transforms/v2/functional/__init__.py
+++ b/torchvision/transforms/v2/functional/__init__.py
@@ -76,6 +76,7 @@
     solarize_image_pil,
     solarize_image_tensor,
     solarize_video,
+    to_grayscale,
 )
 from ._geometry import (
     affine,
@@ -168,4 +169,4 @@
 from ._temporal import uniform_temporal_subsample, uniform_temporal_subsample_video
 from ._type_conversion import pil_to_tensor, to_image_pil, to_image_tensor, to_pil_image
 
-from ._deprecated import get_image_size, to_grayscale, to_tensor  # usort: skip
+from ._deprecated import get_image_size, to_tensor  # usort: skip
diff --git a/torchvision/transforms/v2/functional/_color.py b/torchvision/transforms/v2/functional/_color.py
index 4ba7e5b36b3..13417e4a990 100644
--- a/torchvision/transforms/v2/functional/_color.py
+++ b/torchvision/transforms/v2/functional/_color.py
@@ -56,6 +56,11 @@ def rgb_to_grayscale(
         )
 
 
+# `to_grayscale` actually predates `rgb_to_grayscale` in v1, but only handles PIL images. Since `rgb_to_grayscale` is a
+# superset in terms of functionality and has the same signature, we alias here to avoid disruption.
+to_grayscale = rgb_to_grayscale
+
+
 def _blend(image1: torch.Tensor, image2: torch.Tensor, ratio: float) -> torch.Tensor:
     ratio = float(ratio)
     fp = image1.is_floating_point()
diff --git a/torchvision/transforms/v2/functional/_deprecated.py b/torchvision/transforms/v2/functional/_deprecated.py
index 954daa97c21..c9a0f647e60 100644
--- a/torchvision/transforms/v2/functional/_deprecated.py
+++ b/torchvision/transforms/v2/functional/_deprecated.py
@@ -1,27 +1,12 @@
 import warnings
 from typing import Any, List, Union
 
-import PIL.Image
 import torch
 
 from torchvision import datapoints
 from torchvision.transforms import functional as _F
 
 
-@torch.jit.unused
-def to_grayscale(inpt: PIL.Image.Image, num_output_channels: int = 1) -> PIL.Image.Image:
-    call = ", num_output_channels=3" if num_output_channels == 3 else ""
-    replacement = "convert_color_space(..., color_space=datapoints.ColorSpace.GRAY)"
-    if num_output_channels == 3:
-        replacement = f"convert_color_space({replacement}, color_space=datapoints.ColorSpace.RGB)"
-    warnings.warn(
-        f"The function `to_grayscale(...{call})` is deprecated in will be removed in a future release. "
-        f"Instead, please use `{replacement}`.",
-    )
-
-    return _F.to_grayscale(inpt, num_output_channels=num_output_channels)
-
-
 @torch.jit.unused
 def to_tensor(inpt: Any) -> torch.Tensor:
     warnings.warn(
diff --git a/torchvision/transforms/v2/functional/_geometry.py b/torchvision/transforms/v2/functional/_geometry.py
index aab3be24e0b..e1dd2866bc5 100644
--- a/torchvision/transforms/v2/functional/_geometry.py
+++ b/torchvision/transforms/v2/functional/_geometry.py
@@ -43,7 +43,8 @@ def horizontal_flip_image_tensor(image: torch.Tensor) -> torch.Tensor:
     return image.flip(-1)
 
 
-horizontal_flip_image_pil = _FP.hflip
+def horizontal_flip_image_pil(image: PIL.Image.Image) -> PIL.Image.Image:
+    return _FP.hflip(image)
 
 
 def horizontal_flip_mask(mask: torch.Tensor) -> torch.Tensor:
@@ -92,7 +93,8 @@ def vertical_flip_image_tensor(image: torch.Tensor) -> torch.Tensor:
     return image.flip(-2)
 
 
-vertical_flip_image_pil = _FP.vflip
+def vertical_flip_image_pil(image: PIL.Image) -> PIL.Image:
+    return _FP.vflip(image)
 
 
 def vertical_flip_mask(mask: torch.Tensor) -> torch.Tensor:
@@ -919,7 +921,6 @@ def rotate_image_pil(
 
     if center is not None and expand:
         warnings.warn("The provided center argument has no effect on the result if expand is True")
-        center = None
 
     return _FP.rotate(
         image, angle, interpolation=pil_modes_mapping[interpolation], expand=expand, fill=fill, center=center
@@ -936,7 +937,6 @@ def rotate_bounding_box(
 ) -> Tuple[torch.Tensor, Tuple[int, int]]:
     if center is not None and expand:
         warnings.warn("The provided center argument has no effect on the result if expand is True")
-        center = None
 
     return _affine_bounding_box_with_expand(
         bounding_box,
diff --git a/torchvision/utils.py b/torchvision/utils.py
index 1418656a7f2..6ec19a0e0a1 100644
--- a/torchvision/utils.py
+++ b/torchvision/utils.py
@@ -304,7 +304,10 @@ def draw_segmentation_masks(
         return image
 
     out_dtype = torch.uint8
-    colors = [torch.tensor(color, dtype=out_dtype) for color in _parse_colors(colors, num_objects=num_masks)]
+    colors = [
+        torch.tensor(color, dtype=out_dtype, device=image.device)
+        for color in _parse_colors(colors, num_objects=num_masks)
+    ]
 
     img_to_draw = image.detach().clone()
     # TODO: There might be a way to vectorize this