Revert "Mergeback 1.7.0 to develop" (#1539)

openvinotoolkit · Jun 19, 2024 · e8be623 · e8be623
1 parent 31f712a
commit e8be623
Show file tree

Hide file tree

Showing 31 changed files with 248 additions and 928 deletions.
diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml
@@ -76,14 +76,3 @@ jobs:
       uses: github/codeql-action/analyze@f079b8493333aace61c81488f8bd40919487bd9f # v3.25.7
       with:
         category: "/language:${{matrix.language}}"
-    - name: Generate Security Report
-      uses: rsdmike/github-security-report-action@a149b24539044c92786ec39af8ba38c93496495d # v3.0.4
-      with:
-        outputDir: ${{matrix.language}}
-        template: report
-        token: ${{ secrets.GITHUB_TOKEN }}
-    - name: GitHub Upload Release Artifacts
-      uses: actions/upload-artifact@1746f4ab65b179e0ea60a494b83293b640dd5bba # v4.3.2
-      with:
-        name: codeql-report-${{matrix.language}}
-        path: "./${{matrix.language}}/report.pdf"
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,21 +7,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## \[unreleased\]
 ### New features
-- Add TabularValidator
-  (<https://github.com/openvinotoolkit/datumaro/pull/1498>)
-- Add Clean Transform for tabular data type
-  (<https://github.com/openvinotoolkit/datumaro/pull/1520>)
-
-### Enhancements
-- Set label name with parents to avoid duplicates for AstypeAnnotations
-  (<https://github.com/openvinotoolkit/datumaro/pull/1492>)
-- Pass Keyword Argument to TabularDataBase
-  (<https://github.com/openvinotoolkit/datumaro/pull/1522>)
-
-## Q2 2024 Release 1.7.0
-### New features
-- Support 'Video' media type in datumaro format
-  (<https://github.com/openvinotoolkit/datumaro/pull/1491>)
 - Add ann_types property for dataset
   (<https://github.com/openvinotoolkit/datumaro/pull/1422>, <https://github.com/openvinotoolkit/datumaro/pull/1479>)
 - Add AnnotationType.rotated_bbox for oriented object detection
@@ -30,8 +15,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/1475>)
 - Add AstypeAnnotations Transform
   (<https://github.com/openvinotoolkit/datumaro/pull/1484>)
-- Enhance DatasetItem annotations for semantic segmentation model training use case
-  (<https://github.com/openvinotoolkit/datumaro/pull/1503>)
+- Add TabularValidator
+  (<https://github.com/openvinotoolkit/datumaro/pull/1498>)
+- Add Clean Transform for tabular data type
+  (<https://github.com/openvinotoolkit/datumaro/pull/1520>)
 
 ### Enhancements
 - Fix ambiguous COCO format detector
@@ -40,12 +27,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   (<https://github.com/openvinotoolkit/datumaro/pull/1471>)
 - Add ExtractedMask and update importers who can use it to use it
   (<https://github.com/openvinotoolkit/datumaro/pull/1480>)
-- Improve PIL and COLOR_BGR context image decode performance
-  (<https://github.com/openvinotoolkit/datumaro/pull/1501>)
-- Improve get_area() of Polygon through Shoelace formula
-  (<https://github.com/openvinotoolkit/datumaro/pull/1507>)
-- Improve _Shape point converter
-  (<https://github.com/openvinotoolkit/datumaro/pull/1508>)
+- Set label name with parents to avoid duplicates for AstypeAnnotations
+  (<https://github.com/openvinotoolkit/datumaro/pull/1492>)
+- Pass Keyword Argument to TabularDataBase
+  (<https://github.com/openvinotoolkit/datumaro/pull/1522>)
 
 ### Bug fixes
 - Split the video directory into subsets to avoid overwriting

diff --git a/docs/source/docs/command-reference/context/util.md b/docs/source/docs/command-reference/context/util.md
@@ -17,7 +17,6 @@ the dataset reproducible and stable.
 This command provides different options like setting the frame step
 (the `-s/--step` option), file name pattern (`-n/--name-pattern`),
 starting (`-b/--start-frame`) and finishing (`-e/--end-frame`) frame etc.
-Note that starting and finishing frames denote a closed interval [`start-frame`, `end-frame`].
 
 Note that this command is equivalent to the following commands:
 ```bash

diff --git a/docs/source/docs/data-formats/formats/datumaro.md b/docs/source/docs/data-formats/formats/datumaro.md
@@ -11,7 +11,6 @@ Supported media types:
 
 - `Image`
 - `PointCloud`
-- `Video`
 - `VideoFrame`
 
 Supported annotation types:

diff --git a/docs/source/docs/data-formats/formats/datumaro_binary.md b/docs/source/docs/data-formats/formats/datumaro_binary.md
@@ -59,7 +59,6 @@ Supported media types:
 
 - `Image`
 - `PointCloud`
-- `Video`
 - `VideoFrame`
 
 Supported annotation types:

diff --git a/docs/source/docs/data-formats/formats/video.md b/docs/source/docs/data-formats/formats/video.md
@@ -31,7 +31,6 @@ dataset = dm.Dataset.import_from('<path_to_video>', format='video_frames')
 
 Datumaro has few import options for `video_frames` format, to apply them
 use the `--` after the main command argument.
-Note that a video has a closed interval of [`start-frame`, `end-frame`].
 
 `video_frames` import options:
 - `--subset` (string) - The name of the subset for the produced

diff --git a/docs/source/docs/release_notes.rst b/docs/source/docs/release_notes.rst
@@ -3,38 +3,6 @@ Release Notes
 
 .. toctree::
    :maxdepth: 1
-
-v1.7.0 (2024 Q2)
-----------------
-
-New features
-^^^^^^^^^^^^
-- Add ann_types property for dataset
-- Add AnnotationType.rotated_bbox for oriented object detection
-- Add DOTA data format for oriented object detection task
-- Add AstypeAnnotations Transform
-
-Enhancements
-^^^^^^^^^^^^
-- Fix ambiguous COCO format detector
-- Get target information for tabular dataset
-- Add ExtractedMask and update importers who can use it to use it
-
-v1.6.1 (2024.05)
-----------------
-
-Enhancements
-^^^^^^^^^^^^
-- Prevent AcLauncher for OpenVINO 2024.0
-
-Bug fixes
-^^^^^^^^^
-- Modify lxml dependency constraint
-- Fix CLI error occurring when installed with default option only
-- Relax Pillow dependency constraint
-- Modify Numpy dependency constraint
-- Relax old pandas version constraint
-
 v1.6.0 (2024.04)
 ----------------
 

diff --git a/src/datumaro/components/annotation.py b/src/datumaro/components/annotation.py
@@ -683,8 +683,11 @@ def lazy_extract(self, instance_id: int) -> Callable[[], IndexMaskImage]:
 
 @attrs(slots=True, order=False)
 class _Shape(Annotation):
+    # Flattened list of point coordinates
     points: List[float] = field(
-        converter=lambda x: np.array(x, dtype=np.float32).round(COORDINATE_ROUNDING_DIGITS).tolist()
+        converter=lambda x: np.around(
+            np.array(x, dtype=np.float32), COORDINATE_ROUNDING_DIGITS
+        ).tolist()
     )
 
     label: Optional[int] = field(
@@ -809,12 +812,11 @@ def __attrs_post_init__(self):
         )
 
     def get_area(self):
-        # import pycocotools.mask as mask_utils
+        import pycocotools.mask as mask_utils
 
-        # x, y, w, h = self.get_bbox()
-        # rle = mask_utils.frPyObjects([self.points], y + h, x + w)
-        # area = mask_utils.area(rle)[0]
-        area = self._get_shoelace_area()
+        x, y, w, h = self.get_bbox()
+        rle = mask_utils.frPyObjects([self.points], y + h, x + w)
+        area = mask_utils.area(rle)[0]
         return area
 
     def as_polygon(self) -> List[float]:
@@ -840,21 +842,6 @@ def __eq__(self, other):
         inter_area = self_polygon.intersection(other_polygon).area
         return abs(self_polygon.area - inter_area) < CHECK_POLYGON_EQ_EPSILONE
 
-    def _get_shoelace_area(self):
-        points = self.get_points()
-        n = len(points)
-        # Not a polygon
-        if n < 3:
-            return 0
-
-        area = 0.0
-        for i in range(n):
-            x1, y1 = points[i]
-            x2, y2 = points[(i + 1) % n]  # Next vertex, wrapping around using modulo
-            area += x1 * y2 - y1 * x2
-
-        return abs(area) / 2.0
-
 
 @attrs(slots=True, init=False, order=False)
 class Bbox(_Shape):
@@ -1381,69 +1368,3 @@ class Tabular(Annotation):
 
     _type = AnnotationType.tabular
     values: Dict[str, TableDtype] = field(converter=dict)
-
-
-class Annotations(List[Annotation]):
-    """List of `Annotation` equipped with additional utility functions."""
-
-    def get_semantic_seg_mask(
-        self, ignore_index: int = 0, dtype: np.dtype = np.uint8
-    ) -> np.ndarray:
-        """Extract semantic segmentation mask from a collection of Datumaro `Mask`s.
-
-        Args:
-            ignore_index: Scalar value to fill in the zeros in each binary mask
-                before merging into a semantic segmentation mask. This value is usually used
-                to represent a pixel denoting a not-interested region. Defaults to 0.
-            dtype: Data type for the resulting mask. Defaults to np.uint8.
-
-        Returns:
-            Semantic segmentation mask generated by merging Datumaro `Mask`s.
-
-        Raises:
-            ValueError: If there are no mask annotations or if there is an inconsistency in mask sizes.
-        """
-
-        masks = [ann for ann in self if isinstance(ann, Mask)]
-        # Mask with a lower z_order value will come first
-        masks.sort(key=lambda mask: mask.z_order)
-
-        if not masks:
-            msg = "There is no mask annotations."
-            raise ValueError(msg)
-
-        # Dispatching for better performance
-        # If all masks are `ExtractedMask`, share a same source `index_mask`, and
-        # there is no label remapping.
-        if (
-            all(isinstance(mask, ExtractedMask) for mask in masks)
-            # and set(id(mask.index_mask) for mask in masks) == 1
-            and all(mask.index_mask == next(iter(masks)).index_mask for mask in masks)
-            and all(mask.index == mask.label for mask in masks)
-        ):
-            index_mask = next(iter(masks)).index_mask
-            semantic_seg_mask: np.ndarray = index_mask() if callable(index_mask) else index_mask
-            if semantic_seg_mask.dtype != dtype:
-                semantic_seg_mask = semantic_seg_mask.astype(dtype)
-
-            labels = np.unique(np.array([mask.label for mask in masks]))
-            ignore_index_mask = np.isin(semantic_seg_mask, labels, invert=True)
-
-            return np.where(ignore_index_mask, ignore_index, semantic_seg_mask)
-
-        class_masks = [mask.as_class_mask(ignore_index=ignore_index, dtype=dtype) for mask in masks]
-
-        max_h = max([mask.shape[0] for mask in class_masks])
-        max_w = max([mask.shape[1] for mask in class_masks])
-
-        semantic_seg_mask = np.full(shape=(max_h, max_w), fill_value=ignore_index, dtype=dtype)
-
-        for class_mask in class_masks:
-            if class_mask.shape != semantic_seg_mask.shape:
-                msg = f"There is inconsistency in mask size: {class_mask.shape}!={semantic_seg_mask.shape}."
-                raise ValueError(msg, class_mask.shape, semantic_seg_mask.shape)
-
-            ignore_index_mask = class_mask == ignore_index
-            semantic_seg_mask = np.where(ignore_index_mask, semantic_seg_mask, class_mask)
-
-        return semantic_seg_mask
diff --git a/src/datumaro/components/dataset.py b/src/datumaro/components/dataset.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2024 Intel Corporation
+# Copyright (C) 2020-2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -41,7 +41,6 @@
 from datumaro.components.environment import DEFAULT_ENVIRONMENT, Environment
 from datumaro.components.errors import (
     DatasetImportError,
-    DatumaroError,
     MultipleFormatsMatchError,
     NoMatchingFormatsError,
     StreamedItemError,
@@ -889,10 +888,6 @@ def import_from(
             cause = e.__cause__ if getattr(e, "__cause__", None) is not None else e
             cause.__traceback__ = e.__traceback__
             raise DatasetImportError(f"Failed to import dataset '{format}' at '{path}'.") from cause
-        except DatumaroError as e:
-            cause = e.__cause__ if getattr(e, "__cause__", None) is not None else e
-            cause.__traceback__ = e.__traceback__
-            raise DatasetImportError(f"Failed to import dataset '{format}' at '{path}'.") from cause
         except Exception as e:
             raise DatasetImportError(f"Failed to import dataset '{format}' at '{path}'.") from e
 

diff --git a/src/datumaro/components/dataset_base.py b/src/datumaro/components/dataset_base.py
@@ -9,7 +9,7 @@
 import attr
 from attr import attrs, field
 
-from datumaro.components.annotation import Annotation, Annotations, AnnotationType, Categories
+from datumaro.components.annotation import Annotation, AnnotationType, Categories
 from datumaro.components.cli_plugin import CliPlugin
 from datumaro.components.contexts.importer import ImportContext, NullImportContext
 from datumaro.components.media import Image, MediaElement
@@ -29,7 +29,7 @@ class DatasetItem:
         default=None, validator=attr.validators.optional(attr.validators.instance_of(MediaElement))
     )
 
-    annotations: Annotations = field(factory=Annotations, validator=default_if_none(Annotations))
+    annotations: List[Annotation] = field(factory=list, validator=default_if_none(list))
 
     attributes: Dict[str, Any] = field(factory=dict, validator=default_if_none(dict))
 

diff --git a/src/datumaro/components/exporter.py b/src/datumaro/components/exporter.py
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2024 Intel Corporation
+# Copyright (C) 2019-2023 Intel Corporation
 #
 # SPDX-License-Identifier: MIT
 
@@ -22,7 +22,7 @@
     DatumaroError,
     ItemExportError,
 )
-from datumaro.components.media import Image, PointCloud, Video, VideoFrame
+from datumaro.components.media import Image, PointCloud, VideoFrame
 from datumaro.components.progress_reporting import NullProgressReporter, ProgressReporter
 from datumaro.util.meta_file_util import save_hashkey_file, save_meta_file
 from datumaro.util.os_util import rmtree
@@ -339,15 +339,10 @@ def make_pcd_extra_image_filename(self, item, idx, image, *, name=None, subdir=N
         ) + self.find_image_ext(image)
 
     def make_video_filename(self, item, *, name=None):
-        STR_WRONG_MEDIA_TYPE = "Video item's media type should be Video or VideoFrame"
-        assert isinstance(item, DatasetItem), STR_WRONG_MEDIA_TYPE
-
-        if isinstance(item.media, VideoFrame):
+        if isinstance(item, DatasetItem) and isinstance(item.media, VideoFrame):
             video_file_name = osp.basename(item.media.video.path)
-        elif isinstance(item.media, Video):
-            video_file_name = osp.basename(item.media.path)
         else:
-            assert False, STR_WRONG_MEDIA_TYPE
+            assert "Video item type should be VideoFrame"
 
         return video_file_name
 
@@ -408,7 +403,7 @@ def save_video(
         subdir: Optional[str] = None,
         fname: Optional[str] = None,
     ):
-        if not item.media or not isinstance(item.media, (Video, VideoFrame)):
+        if not item.media or not isinstance(item.media, VideoFrame):
             log.warning("Item '%s' has no video", item.id)
             return
         basedir = self._video_dir if basedir is None else basedir
@@ -420,10 +415,7 @@ def save_video(
 
         os.makedirs(osp.dirname(path), exist_ok=True)
 
-        if isinstance(item.media, VideoFrame):
-            item.media.video.save(path, crypter=NULL_CRYPTER)
-        else:  # Video
-            item.media.save(path, crypter=NULL_CRYPTER)
+        item.media.video.save(path, crypter=NULL_CRYPTER)
 
     @property
     def images_dir(self) -> str: