From b0bb4ca54b992f46795752f8befefd39919c34c2 Mon Sep 17 00:00:00 2001
From: Illia Vysochyn <ivysochyn@internships.antmicro.com>
Date: Fri, 18 Aug 2023 23:33:40 +0200
Subject: [PATCH] torchvision: Fix typos

---
 torchvision/datapoints/_dataset_wrapper.py | 6 +++---
 torchvision/datasets/_stereo_matching.py   | 2 +-
 torchvision/io/video_reader.py             | 6 +++---
 torchvision/transforms/v2/_geometry.py     | 2 +-
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/torchvision/datapoints/_dataset_wrapper.py b/torchvision/datapoints/_dataset_wrapper.py
index f1e7857264a..3f1c41debf5 100644
--- a/torchvision/datapoints/_dataset_wrapper.py
+++ b/torchvision/datapoints/_dataset_wrapper.py
@@ -37,17 +37,17 @@ def wrap_dataset_for_transforms_v2(dataset, target_keys=None):
         * :class:`~torchvision.datasets.CocoDetection`: Instead of returning the target as list of dicts, the wrapper
           returns a dict of lists. In addition, the key-value-pairs ``"boxes"`` (in ``XYXY`` coordinate format),
           ``"masks"`` and ``"labels"`` are added and wrap the data in the corresponding ``torchvision.datapoints``.
-          The original keys are preserved. If ``target_keys`` is ommitted, returns only the values for the
+          The original keys are preserved. If ``target_keys`` is omitted, returns only the values for the
           ``"image_id"``, ``"boxes"``, and ``"labels"``.
         * :class:`~torchvision.datasets.VOCDetection`: The key-value-pairs ``"boxes"`` and ``"labels"`` are added to
           the target and wrap the data in the corresponding ``torchvision.datapoints``. The original keys are
-          preserved. If ``target_keys`` is ommitted, returns only the values for the ``"boxes"`` and ``"labels"``.
+          preserved. If ``target_keys`` is omitted, returns only the values for the ``"boxes"`` and ``"labels"``.
         * :class:`~torchvision.datasets.CelebA`: The target for ``target_type="bbox"`` is converted to the ``XYXY``
           coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBoxes` datapoint.
         * :class:`~torchvision.datasets.Kitti`: Instead returning the target as list of dicts, the wrapper returns a
           dict of lists. In addition, the key-value-pairs ``"boxes"`` and ``"labels"`` are added and wrap the data
           in the corresponding ``torchvision.datapoints``. The original keys are preserved. If ``target_keys`` is
-          ommitted, returns only the values for the ``"boxes"`` and ``"labels"``.
+          omitted, returns only the values for the ``"boxes"`` and ``"labels"``.
         * :class:`~torchvision.datasets.OxfordIIITPet`: The target for ``target_type="segmentation"`` is wrapped into a
           :class:`~torchvision.datapoints.Mask` datapoint.
         * :class:`~torchvision.datasets.Cityscapes`: The target for ``target_type="semantic"`` is wrapped into a
diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py
index b07161d277c..c180e2e1eb8 100644
--- a/torchvision/datasets/_stereo_matching.py
+++ b/torchvision/datasets/_stereo_matching.py
@@ -796,7 +796,7 @@ def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]:
         # in order to extract disparity from depth maps
         camera_settings_path = Path(file_path).parent / "_camera_settings.json"
         with open(camera_settings_path, "r") as f:
-            # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constatnt)
+            # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constant)
             intrinsics = json.load(f)
             focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"]
             baseline, pixel_constant = 6, 100  # pixel constant is inverted
diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py
index 1cdcb267d73..0107c82019b 100644
--- a/torchvision/io/video_reader.py
+++ b/torchvision/io/video_reader.py
@@ -91,14 +91,14 @@ class VideoReader:
 
         Each stream descriptor consists of two parts: stream type (e.g. 'video') and
         a unique stream id (which are determined by the video encoding).
-        In this way, if the video contaner contains multiple
+        In this way, if the video container contains multiple
         streams of the same type, users can access the one they want.
         If only stream type is passed, the decoder auto-detects first stream of that type.
 
     Args:
         src (string, bytes object, or tensor): The media source.
             If string-type, it must be a file path supported by FFMPEG.
-            If bytes should be an in memory representatin of a file supported by FFMPEG.
+            If bytes, should be an in-memory representation of a file supported by FFMPEG.
             If Tensor, it is interpreted internally as byte buffer.
             It must be one-dimensional, of type ``torch.uint8``.
 
@@ -279,7 +279,7 @@ def set_current_stream(self, stream: str) -> bool:
                 Currently available stream types include ``['video', 'audio']``.
                 Each descriptor consists of two parts: stream type (e.g. 'video') and
                 a unique stream id (which are determined by video encoding).
-                In this way, if the video contaner contains multiple
+                In this way, if the video container contains multiple
                 streams of the same type, users can access the one they want.
                 If only stream type is passed, the decoder auto-detects first stream
                 of that type and returns it.
diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py
index 0be62ae8a12..a442b2d4be0 100644
--- a/torchvision/transforms/v2/_geometry.py
+++ b/torchvision/transforms/v2/_geometry.py
@@ -1023,7 +1023,7 @@ class ElasticTransform(Transform):
 
     .. note::
         Implementation to transform bounding boxes is approximative (not exact).
-        We construct an approximation of the inverse grid as ``inverse_grid = idenity - displacement``.
+        We construct an approximation of the inverse grid as ``inverse_grid = identity - displacement``.
         This is not an exact inverse of the grid used to transform images, i.e. ``grid = identity + displacement``.
         Our assumption is that ``displacement * displacement`` is small and can be ignored.
         Large displacements would lead to large errors in the approximation.