From b0bb4ca54b992f46795752f8befefd39919c34c2 Mon Sep 17 00:00:00 2001 From: Illia Vysochyn Date: Fri, 18 Aug 2023 23:33:40 +0200 Subject: [PATCH] torchvision: Fix typos --- torchvision/datapoints/_dataset_wrapper.py | 6 +++--- torchvision/datasets/_stereo_matching.py | 2 +- torchvision/io/video_reader.py | 6 +++--- torchvision/transforms/v2/_geometry.py | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/torchvision/datapoints/_dataset_wrapper.py b/torchvision/datapoints/_dataset_wrapper.py index f1e7857264a..3f1c41debf5 100644 --- a/torchvision/datapoints/_dataset_wrapper.py +++ b/torchvision/datapoints/_dataset_wrapper.py @@ -37,17 +37,17 @@ def wrap_dataset_for_transforms_v2(dataset, target_keys=None): * :class:`~torchvision.datasets.CocoDetection`: Instead of returning the target as list of dicts, the wrapper returns a dict of lists. In addition, the key-value-pairs ``"boxes"`` (in ``XYXY`` coordinate format), ``"masks"`` and ``"labels"`` are added and wrap the data in the corresponding ``torchvision.datapoints``. - The original keys are preserved. If ``target_keys`` is ommitted, returns only the values for the + The original keys are preserved. If ``target_keys`` is omitted, returns only the values for the ``"image_id"``, ``"boxes"``, and ``"labels"``. * :class:`~torchvision.datasets.VOCDetection`: The key-value-pairs ``"boxes"`` and ``"labels"`` are added to the target and wrap the data in the corresponding ``torchvision.datapoints``. The original keys are - preserved. If ``target_keys`` is ommitted, returns only the values for the ``"boxes"`` and ``"labels"``. + preserved. If ``target_keys`` is omitted, returns only the values for the ``"boxes"`` and ``"labels"``. * :class:`~torchvision.datasets.CelebA`: The target for ``target_type="bbox"`` is converted to the ``XYXY`` coordinate format and wrapped into a :class:`~torchvision.datapoints.BoundingBoxes` datapoint. * :class:`~torchvision.datasets.Kitti`: Instead returning the target as list of dicts, the wrapper returns a dict of lists. In addition, the key-value-pairs ``"boxes"`` and ``"labels"`` are added and wrap the data in the corresponding ``torchvision.datapoints``. The original keys are preserved. If ``target_keys`` is - ommitted, returns only the values for the ``"boxes"`` and ``"labels"``. + omitted, returns only the values for the ``"boxes"`` and ``"labels"``. * :class:`~torchvision.datasets.OxfordIIITPet`: The target for ``target_type="segmentation"`` is wrapped into a :class:`~torchvision.datapoints.Mask` datapoint. * :class:`~torchvision.datasets.Cityscapes`: The target for ``target_type="semantic"`` is wrapped into a diff --git a/torchvision/datasets/_stereo_matching.py b/torchvision/datasets/_stereo_matching.py index b07161d277c..c180e2e1eb8 100644 --- a/torchvision/datasets/_stereo_matching.py +++ b/torchvision/datasets/_stereo_matching.py @@ -796,7 +796,7 @@ def _read_disparity(self, file_path: str) -> Tuple[np.ndarray, None]: # in order to extract disparity from depth maps camera_settings_path = Path(file_path).parent / "_camera_settings.json" with open(camera_settings_path, "r") as f: - # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constatnt) + # inverse of depth-from-disparity equation: depth = (baseline * focal) / (disparity * pixel_constant) intrinsics = json.load(f) focal = intrinsics["camera_settings"][0]["intrinsic_settings"]["fx"] baseline, pixel_constant = 6, 100 # pixel constant is inverted diff --git a/torchvision/io/video_reader.py b/torchvision/io/video_reader.py index 1cdcb267d73..0107c82019b 100644 --- a/torchvision/io/video_reader.py +++ b/torchvision/io/video_reader.py @@ -91,14 +91,14 @@ class VideoReader: Each stream descriptor consists of two parts: stream type (e.g. 'video') and a unique stream id (which are determined by the video encoding). - In this way, if the video contaner contains multiple + In this way, if the video container contains multiple streams of the same type, users can access the one they want. If only stream type is passed, the decoder auto-detects first stream of that type. Args: src (string, bytes object, or tensor): The media source. If string-type, it must be a file path supported by FFMPEG. - If bytes should be an in memory representatin of a file supported by FFMPEG. + If bytes, should be an in-memory representation of a file supported by FFMPEG. If Tensor, it is interpreted internally as byte buffer. It must be one-dimensional, of type ``torch.uint8``. @@ -279,7 +279,7 @@ def set_current_stream(self, stream: str) -> bool: Currently available stream types include ``['video', 'audio']``. Each descriptor consists of two parts: stream type (e.g. 'video') and a unique stream id (which are determined by video encoding). - In this way, if the video contaner contains multiple + In this way, if the video container contains multiple streams of the same type, users can access the one they want. If only stream type is passed, the decoder auto-detects first stream of that type and returns it. diff --git a/torchvision/transforms/v2/_geometry.py b/torchvision/transforms/v2/_geometry.py index 0be62ae8a12..a442b2d4be0 100644 --- a/torchvision/transforms/v2/_geometry.py +++ b/torchvision/transforms/v2/_geometry.py @@ -1023,7 +1023,7 @@ class ElasticTransform(Transform): .. note:: Implementation to transform bounding boxes is approximative (not exact). - We construct an approximation of the inverse grid as ``inverse_grid = idenity - displacement``. + We construct an approximation of the inverse grid as ``inverse_grid = identity - displacement``. This is not an exact inverse of the grid used to transform images, i.e. ``grid = identity + displacement``. Our assumption is that ``displacement * displacement`` is small and can be ignored. Large displacements would lead to large errors in the approximation.